From: Jaime Marquínez Ferrándiz Date: Wed, 18 Sep 2013 21:04:54 +0000 (+0200) Subject: Merge pull request #1438 from rzhxeo/fktv X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=6c36d8d6fb39b686ae5b475868de3523b6c76f76;hp=0761d02b0baf20955bd6e4f53568a3bbaa75ab5c;p=youtube-dl Merge pull request #1438 from rzhxeo/fktv Add support for http://fernsehkritik.tv --- diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py new file mode 100644 index 000000000..e0c3cc83e --- /dev/null +++ b/devscripts/buildserver.py @@ -0,0 +1,405 @@ +#!/usr/bin/python3 + +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +import argparse +import ctypes +import functools +import sys +import threading +import traceback +import os.path + + +class BuildHTTPServer(ThreadingMixIn, HTTPServer): + allow_reuse_address = True + + +advapi32 = ctypes.windll.advapi32 + +SC_MANAGER_ALL_ACCESS = 0xf003f +SC_MANAGER_CREATE_SERVICE = 0x02 +SERVICE_WIN32_OWN_PROCESS = 0x10 +SERVICE_AUTO_START = 0x2 +SERVICE_ERROR_NORMAL = 0x1 +DELETE = 0x00010000 +SERVICE_STATUS_START_PENDING = 0x00000002 +SERVICE_STATUS_RUNNING = 0x00000004 +SERVICE_ACCEPT_STOP = 0x1 + +SVCNAME = 'youtubedl_builder' + +LPTSTR = ctypes.c_wchar_p +START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) + + +class SERVICE_TABLE_ENTRY(ctypes.Structure): + _fields_ = [ + ('lpServiceName', LPTSTR), + ('lpServiceProc', START_CALLBACK) + ] + + +HandlerEx = ctypes.WINFUNCTYPE( + ctypes.c_int, # return + ctypes.c_int, # dwControl + ctypes.c_int, # dwEventType + ctypes.c_void_p, # lpEventData, + ctypes.c_void_p, # lpContext, +) + + +def _ctypes_array(c_type, py_array): + ar = (c_type * len(py_array))() + ar[:] = py_array + return ar + + +def win_OpenSCManager(): + res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) + if not res: + raise Exception('Opening service manager failed - ' + 'are you running this as administrator?') + return res + + +def win_install_service(service_name, cmdline): + manager = win_OpenSCManager() + try: + h = advapi32.CreateServiceW( + manager, service_name, None, + SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, + SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, + cmdline, None, None, None, None, None) + if not h: + raise OSError('Service creation failed: %s' % ctypes.FormatError()) + + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_uninstall_service(service_name): + manager = win_OpenSCManager() + try: + h = advapi32.OpenServiceW(manager, service_name, DELETE) + if not h: + raise OSError('Could not find service %s: %s' % ( + service_name, ctypes.FormatError())) + + try: + if not advapi32.DeleteService(h): + raise OSError('Deletion failed: %s' % ctypes.FormatError()) + finally: + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_service_report_event(service_name, msg, is_error=True): + with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: + f.write(msg + '\n') + + event_log = advapi32.RegisterEventSourceW(None, service_name) + if not event_log: + raise OSError('Could not report event: %s' % ctypes.FormatError()) + + try: + type_id = 0x0001 if is_error else 0x0004 + event_id = 0xc0000000 if is_error else 0x40000000 + lines = _ctypes_array(LPTSTR, [msg]) + + if not advapi32.ReportEventW( + event_log, type_id, 0, event_id, None, len(lines), 0, + lines, None): + raise OSError('Event reporting failed: %s' % ctypes.FormatError()) + finally: + advapi32.DeregisterEventSource(event_log) + + +def win_service_handler(stop_event, *args): + try: + raise ValueError('Handler called with args ' + repr(args)) + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_set_status(handle, status_code): + svcStatus = SERVICE_STATUS() + svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS + svcStatus.dwCurrentState = status_code + svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP + + svcStatus.dwServiceSpecificExitCode = 0 + + if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): + raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) + + +def win_service_main(service_name, real_main, argc, argv_raw): + try: + #args = [argv_raw[i].value for i in range(argc)] + stop_event = threading.Event() + handler = HandlerEx(functools.partial(stop_event, win_service_handler)) + h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) + if not h: + raise OSError('Handler registration failed: %s' % + ctypes.FormatError()) + + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_start(service_name, real_main): + try: + cb = START_CALLBACK( + functools.partial(win_service_main, service_name, real_main)) + dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ + SERVICE_TABLE_ENTRY( + service_name, + cb + ), + SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) + ]) + + if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): + raise OSError('ctypes start failed: %s' % ctypes.FormatError()) + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def main(args=None): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--install', + action='store_const', dest='action', const='install', + help='Launch at Windows startup') + parser.add_argument('-u', '--uninstall', + action='store_const', dest='action', const='uninstall', + help='Remove Windows service') + parser.add_argument('-s', '--service', + action='store_const', dest='action', const='service', + help='Run as a Windows service') + parser.add_argument('-b', '--bind', metavar='', + action='store', default='localhost:8142', + help='Bind to host:port (default %default)') + options = parser.parse_args(args=args) + + if options.action == 'install': + fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') + cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) + win_install_service(SVCNAME, cmdline) + return + + if options.action == 'uninstall': + win_uninstall_service(SVCNAME) + return + + if options.action == 'service': + win_service_start(SVCNAME, main) + return + + host, port_str = options.bind.split(':') + port = int(port_str) + + print('Listening on %s:%d' % (host, port)) + srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) + thr = threading.Thread(target=srv.serve_forever) + thr.start() + input('Press ENTER to shut down') + srv.shutdown() + thr.join() + + +def rmtree(path): + for name in os.listdir(path): + fname = os.path.join(path, name) + if os.path.isdir(fname): + rmtree(fname) + else: + os.chmod(fname, 0o666) + os.remove(fname) + os.rmdir(path) + +#============================================================================== + +class BuildError(Exception): + def __init__(self, output, code=500): + self.output = output + self.code = code + + def __str__(self): + return self.output + + +class HTTPError(BuildError): + pass + + +class PythonBuilder(object): + def __init__(self, **kwargs): + pythonVersion = kwargs.pop('python', '2.7') + try: + key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + try: + self.pythonPath, _ = _winreg.QueryValueEx(key, '') + finally: + _winreg.CloseKey(key) + except Exception: + raise BuildError('No such Python version: %s' % pythonVersion) + + super(PythonBuilder, self).__init__(**kwargs) + + +class GITInfoBuilder(object): + def __init__(self, **kwargs): + try: + self.user, self.repoName = kwargs['path'][:2] + self.rev = kwargs.pop('rev') + except ValueError: + raise BuildError('Invalid path') + except KeyError as e: + raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) + + path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) + if not os.path.exists(path): + os.makedirs(path) + self.basePath = tempfile.mkdtemp(dir=path) + self.buildPath = os.path.join(self.basePath, 'build') + + super(GITInfoBuilder, self).__init__(**kwargs) + + +class GITBuilder(GITInfoBuilder): + def build(self): + try: + subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) + subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(GITBuilder, self).build() + + +class YoutubeDLBuilder(object): + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + + def __init__(self, **kwargs): + if self.repoName != 'youtube-dl': + raise BuildError('Invalid repository "%s"' % self.repoName) + if self.user not in self.authorizedUsers: + raise HTTPError('Unauthorized user "%s"' % self.user, 401) + + super(YoutubeDLBuilder, self).__init__(**kwargs) + + def build(self): + try: + subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(YoutubeDLBuilder, self).build() + + +class DownloadBuilder(object): + def __init__(self, **kwargs): + self.handler = kwargs.pop('handler') + self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) + self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) + if not self.srcPath.startswith(self.buildPath): + raise HTTPError(self.srcPath, 401) + + super(DownloadBuilder, self).__init__(**kwargs) + + def build(self): + if not os.path.exists(self.srcPath): + raise HTTPError('No such file', 404) + if os.path.isdir(self.srcPath): + raise HTTPError('Is a directory: %s' % self.srcPath, 401) + + self.handler.send_response(200) + self.handler.send_header('Content-Type', 'application/octet-stream') + self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) + self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) + self.handler.end_headers() + + with open(self.srcPath, 'rb') as src: + shutil.copyfileobj(src, self.handler.wfile) + + super(DownloadBuilder, self).build() + + +class CleanupTempDir(object): + def build(self): + try: + rmtree(self.basePath) + except Exception as e: + print('WARNING deleting "%s": %s' % (self.basePath, e)) + + super(CleanupTempDir, self).build() + + +class Null(object): + def __init__(self, **kwargs): + pass + + def start(self): + pass + + def close(self): + pass + + def build(self): + pass + + +class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): + pass + + +class BuildHTTPRequestHandler(BaseHTTPRequestHandler): + actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. + + def do_GET(self): + path = urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + action, _, path = path.path.strip('/').partition('/') + if path: + path = path.split('/') + if action in self.actionDict: + try: + builder = self.actionDict[action](path=path, handler=self, **paramDict) + builder.start() + try: + builder.build() + finally: + builder.close() + except BuildError as e: + self.send_response(e.code) + msg = unicode(e).encode('UTF-8') + self.send_header('Content-Type', 'text/plain; charset=UTF-8') + self.send_header('Content-Length', len(msg)) + self.end_headers() + self.wfile.write(msg) + except HTTPError as e: + self.send_response(e.code, str(e)) + else: + self.send_response(500, 'Unknown build method "%s"' % action) + else: + self.send_response(500, 'Malformed URL') + +#============================================================================== + +if __name__ == '__main__': + main() diff --git a/devscripts/release.sh b/devscripts/release.sh index 62c68a6cf..796468b4b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -55,8 +55,8 @@ git push origin "$version" /bin/echo -e "\n### OK, now it is time to build the binaries..." REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz -wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ - wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +read -p "VM running? (y/n) " -n 1 +wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index b390c7e2e..66019ee55 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -24,8 +24,8 @@ tests = [ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"), # 85 - vflkuzxcs 2013/09/11 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", - "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"), + ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', + '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'), # 84 - vflg0g8PQ 2013/08/29 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 99fc7bd28..ff1c86efe 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) def test_youtube_channel_matching(self): assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index bcd9f79f6..83c65d57e 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 5) @@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase): self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) diff --git a/test/test_playlists.py b/test/test_playlists.py index 4a2e00b01..d079a4f23 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE from youtube_dl.utils import * from helper import FakeYDL @@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'5124905') self.assertTrue(len(result['entries']) >= 11) + def test_soundcloud_user(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'9615865') + self.assertTrue(len(result['entries']) >= 12) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 5632871ac..168e6c66c 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') def test_youtube_allsubtitles(self): + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) @@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase): self.assertTrue(subtitles['it'] is not None) def test_youtube_nosubtitles(self): self.url = 'sAjKT8FhjI8' + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c2f992b8e..de2b133e0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -74,6 +74,7 @@ class YoutubeDL(object): writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslangs: List of languages of the subtitles to download @@ -141,14 +142,10 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" @@ -499,8 +496,7 @@ class YoutubeDL(object): return subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub'), - self.params.get('allsubtitles', False)]) + self.params.get('writeautomaticsub')]) if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 696e54f49..df4feefe7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -30,6 +30,7 @@ __authors__ = ( 'Pierre Rudloff', 'Huarong Huo', 'Ismael Mejía', + 'Steffan \'Ruirize\' James', ) __license__ = 'Public Domain' @@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None): general.add_option('-U', '--update', action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -354,7 +355,7 @@ def parseOpts(overrideArguments=None): if overrideArguments is not None: opts, args = parser.parse_args(overrideArguments) if opts.verbose: - sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') + write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') else: xdg_config_home = os.environ.get('XDG_CONFIG_HOME') if xdg_config_home: @@ -367,9 +368,9 @@ def parseOpts(overrideArguments=None): argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) if opts.verbose: - sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') - sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') - sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') + write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') + write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') + write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') return parser, opts, args @@ -392,7 +393,7 @@ def _real_main(argv=None): except (IOError, OSError) as err: if opts.verbose: traceback.print_exc() - sys.stderr.write(u'ERROR: unable to open cookie file\n') + write_string(u'ERROR: unable to open cookie file\n') sys.exit(101) # Set user agent if opts.user_agent is not None: @@ -419,7 +420,7 @@ def _real_main(argv=None): batchurls = [x.strip() for x in batchurls] batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] if opts.verbose: - sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') + write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args @@ -533,6 +534,11 @@ def _real_main(argv=None): else: date = DateRange(opts.dateafter, opts.datebefore) + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + # this was the old behaviour if only --all-sub was given. + if opts.allsubtitles and (opts.writeautomaticsub == False): + opts.writesubtitles = True + if sys.version_info < (3,): # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) if opts.outtmpl is not None: @@ -606,7 +612,7 @@ def _real_main(argv=None): }) if opts.verbose: - sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n') + write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -615,14 +621,14 @@ def _real_main(argv=None): out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): - sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n') + write_string(u'[debug] Git HEAD: ' + out + u'\n') except: try: sys.exc_clear() except: pass - sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') - sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') + write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') + write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') ydl.add_default_info_extractors() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 25a8e3cf5..726c9fa15 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .arte import ArteTvIE from .auengine import AUEngineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE +from .bloomberg import BloombergIE from .breakcom import BreakIE from .brightcove import BrightcoveIE from .c56 import C56IE @@ -71,6 +72,7 @@ from .myvideo import MyVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE +from .newgrounds import NewgroundsIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE @@ -86,7 +88,8 @@ from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .sohu import SohuIE -from .soundcloud import SoundcloudIE, SoundcloudSetIE +from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE +from .southparkstudios import SouthParkStudiosIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE @@ -106,6 +109,7 @@ from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE from .vevo import VevoIE +from .vice import ViceIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 7efd1d823..61ce4469a 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor): for fn,fdata in data['files'].items() if 'Video' in fdata['format']] formats.sort(key=lambda fdata: fdata['file_size']) + for f in formats: + f['ext'] = determine_ext(f['url']) info = { '_type': 'video', @@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor): info['thumbnail'] = thumbnail # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = determine_ext(formats[-1]['url']) + info.update(formats[-1]) - return info \ No newline at end of file + return info diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py new file mode 100644 index 000000000..3666a780b --- /dev/null +++ b/youtube_dl/extractor/bloomberg.py @@ -0,0 +1,27 @@ +import re + +from .common import InfoExtractor + + +class BloombergIE(InfoExtractor): + _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?).html' + + _TEST = { + u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', + u'info_dict': { + u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', + u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + ooyala_url = self._og_search_video_url(webpage) + return self.url_result(ooyala_url, ie='Ooyala') diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 64b465805..765cb1f37 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor): 'width': int(fe.find('./width').text), 'height': int(fe.find('./height').text), 'url': fe.find('./url').text, + 'ext': determine_ext(fe.find('./url').text), 'filesize': int(fe.find('./filesize').text), 'video_bitrate': int(fe.find('./videoBitrate').text), '3sat_qualityname': fe.find('./quality').text, @@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor): } # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = determine_ext(formats[-1]['url']) + info.update(formats[-1]) - return info \ No newline at end of file + return info diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index f2b12c884..b8fe82e47 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): IE_NAME = u'pluzz.francetv.fr' _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' - _TEST = { - u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', - u'file': u'88439064.mp4', - u'info_dict': { - u'title': u'Allô Rufo', - u'description': u'md5:d909f1ebdf963814b65772aea250400e', - }, - u'params': { - u'skip_download': True, - }, - } + # Can't use tests, videos expire in 7 days def _real_extract(self, url): title = re.match(self._VALID_URL, url).group(1) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index f1cd88983..8895ad289 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor): self.report_extraction(video_id) # Extract update date - upload_date = self._html_search_regex('title="Timestamp">(.*?)', + upload_date = self._html_search_regex( + ['title="Timestamp">(.*?)', r'(.+?)'], webpage, u'upload date', fatal=False) if upload_date: # Convert timestring to a format suitable for filename diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index ccca1d7e0..3798118a7 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -7,11 +7,11 @@ from .common import InfoExtractor class HotNewHipHopIE(InfoExtractor): _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P.*)\.html' _TEST = { - u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", + u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html", u'file': u'1435540.mp3', u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', u'info_dict': { - u"title": u"Freddie Gibbs Songs - Lay It Down" + u"title": u"Freddie Gibbs - Lay It Down" } } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 8245b5583..a200dcd74 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -5,34 +5,27 @@ import socket from .common import InfoExtractor from ..utils import ( compat_http_client, - compat_str, compat_urllib_error, compat_urllib_request, - - ExtractorError, + unified_strdate, ) class MixcloudIE(InfoExtractor): - _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'mixcloud' - def report_download_json(self, file_id): - """Report JSON download.""" - self.to_screen(u'Downloading json') - - def get_urls(self, jsonData, fmt, bitrate='best'): - """Get urls from 'audio_formats' section in json""" - try: - bitrate_list = jsonData[fmt] - if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: - bitrate = max(bitrate_list) # select highest - - url_list = jsonData[fmt][bitrate] - except TypeError: # we have no bitrate info. - url_list = jsonData[fmt] - return url_list + _TEST = { + u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', + u'file': u'dholbach-cryptkeeper.mp3', + u'info_dict': { + u'title': u'Cryptkeeper', + u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', + u'uploader': u'Daniel Holbach', + u'uploader_id': u'dholbach', + u'upload_date': u'20111115', + }, + } def check_urls(self, url_list): """Returns 1st active url from list""" @@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor): return None - def _print_formats(self, formats): - print('Available formats:') - for fmt in formats.keys(): - for b in formats[fmt]: - try: - ext = formats[fmt][b][0] - print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) - except TypeError: # we have no bitrate info - ext = formats[fmt][0] - print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) - break - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - # extract uploader & filename from url - uploader = mobj.group(1).decode('utf-8') - file_id = uploader + "-" + mobj.group(2).decode('utf-8') - - # construct API request - file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' - # retrieve .json file with links to files - request = compat_urllib_request.Request(file_url) - try: - self.report_download_json(file_url) - jsonData = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) - - # parse JSON - json_data = json.loads(jsonData) - player_url = json_data['player_swf_url'] - formats = dict(json_data['audio_formats']) - - req_format = self._downloader.params.get('format', None) - - if self._downloader.params.get('listformats', None): - self._print_formats(formats) - return - - if req_format is None or req_format == 'best': - for format_param in formats.keys(): - url_list = self.get_urls(formats, format_param) - # check urls - file_url = self.check_urls(url_list) - if file_url is not None: - break # got it! - else: - if req_format not in formats: - raise ExtractorError(u'Format is not available') - - url_list = self.get_urls(formats, req_format) - file_url = self.check_urls(url_list) - format_param = req_format - return [{ - 'id': file_id.decode('utf-8'), - 'url': file_url.decode('utf-8'), - 'uploader': uploader.decode('utf-8'), - 'upload_date': None, - 'title': json_data['name'], - 'ext': file_url.split('.')[-1].decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), - 'thumbnail': json_data['thumbnail_url'], - 'description': json_data['description'], - 'player_url': player_url.decode('utf-8'), - }] + uploader = mobj.group(1) + cloudcast_name = mobj.group(2) + track_id = '-'.join((uploader, cloudcast_name)) + api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) + webpage = self._download_webpage(url, track_id) + json_data = self._download_webpage(api_url, track_id, + u'Downloading cloudcast info') + info = json.loads(json_data) + + preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') + song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') + template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) + final_song_url = self.check_urls(template_url % i for i in range(30)) + + return { + 'id': track_id, + 'title': info['name'], + 'url': final_song_url, + 'ext': 'mp3', + 'description': info['description'], + 'thumbnail': info['pictures'].get('extra_large'), + 'uploader': info['user']['name'], + 'uploader_id': info['user']['username'], + 'upload_date': unified_strdate(info['created_time']), + 'view_count': info['play_count'], + } diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py new file mode 100644 index 000000000..2ef80bce0 --- /dev/null +++ b/youtube_dl/extractor/newgrounds.py @@ -0,0 +1,38 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class NewgroundsIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)' + _TEST = { + u'url': u'http://www.newgrounds.com/audio/listen/549479', + u'file': u'549479.mp3', + u'md5': u'fe6033d297591288fa1c1f780386f07a', + u'info_dict': { + u"title": u"B7 - BusMode", + u"uploader": u"Burn7", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + music_id = mobj.group('id') + webpage = self._download_webpage(url, music_id) + + title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title') + uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader') + + music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}' + music_url_json = json.loads(music_url_json_string) + music_url = music_url_json['url'] + + return { + 'id': music_id, + 'title': title, + 'url': music_url, + 'uploader': uploader, + 'ext': determine_ext(music_url), + } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index b734722d0..1f7b4d2e7 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor): }, } + @staticmethod + def _url_for_embed_code(embed_code): + return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code + def _extract_result(self, info, more_info): return {'id': info['embedCode'], 'ext': 'mp4', 'title': unescapeHTML(info['title']), - 'url': info['url'], + 'url': info.get('ipad_url') or info['url'], 'description': unescapeHTML(more_info['description']), 'thumbnail': more_info['promo'], } @@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor): mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', player, u'mobile player url') mobile_player = self._download_webpage(mobile_url, embedCode) - videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') + videos_info = self._search_regex( + r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', + mobile_player, u'info').replace('\\"','"') videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') videos_info = json.loads(videos_info) videos_more_info =json.loads(videos_more_info) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5f3a5540d..29cd5617c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,10 +1,12 @@ import json import re +import itertools from .common import InfoExtractor from ..utils import ( compat_str, compat_urlparse, + compat_urllib_parse, ExtractorError, unified_strdate, @@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor): def _resolv_url(cls, url): return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID - def _extract_info_dict(self, info, full_title=None): + def _extract_info_dict(self, info, full_title=None, quiet=False): video_id = info['id'] name = full_title or video_id - self.report_extraction(name) + if quiet == False: + self.report_extraction(name) thumbnail = info['artwork_url'] if thumbnail is not None: @@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE): 'id': info['id'], 'title': info['title'], } + + +class SoundcloudUserIE(SoundcloudIE): + _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + IE_NAME = u'soundcloud:user' + + # it's in tests/test_playlists.py + _TEST = None + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader = mobj.group('user') + + url = 'http://soundcloud.com/%s/' % uploader + resolv_url = self._resolv_url(url) + user_json = self._download_webpage(resolv_url, uploader, + u'Downloading user info') + user = json.loads(user_json) + + tracks = [] + for i in itertools.count(): + data = compat_urllib_parse.urlencode({'offset': i*50, + 'client_id': self._CLIENT_ID, + }) + tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data + response = self._download_webpage(tracks_url, uploader, + u'Downloading tracks page %s' % (i+1)) + new_tracks = json.loads(response) + tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) + if len(new_tracks) < 50: + break + + return { + '_type': 'playlist', + 'id': compat_str(user['id']), + 'title': user['username'], + 'entries': tracks, + } diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py new file mode 100644 index 000000000..a5dc754dd --- /dev/null +++ b/youtube_dl/extractor/southparkstudios.py @@ -0,0 +1,34 @@ +import re + +from .mtv import MTVIE, _media_xml_tag + + +class SouthParkStudiosIE(MTVIE): + IE_NAME = u'southparkstudios.com' + _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P\d+)' + + _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' + + _TEST = { + u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', + u'info_dict': { + u'title': u'Bat Daded', + u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', + }, + } + + # Overwrite MTVIE properties we don't want + _TESTS = [] + + def _get_thumbnail_url(self, uri, itemdoc): + search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + return itemdoc.find(search_path).attrib['url'] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', + webpage, u'mgid') + return self._get_videos_info(mgid) diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 97215f289..90de7de3a 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor): @property def _have_to_download_any_subtitles(self): return any([self._downloader.params.get('writesubtitles', False), - self._downloader.params.get('writeautomaticsub'), - self._downloader.params.get('allsubtitles', False)]) + self._downloader.params.get('writeautomaticsub')]) def _list_available_subtitles(self, video_id, webpage=None): """ outputs the available subtitles for the video """ @@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor): available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) - if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + if self._downloader.params.get('writesubtitles', False): available_subs_list.update(self._get_available_subtitles(video_id)) if not available_subs_list: # error, it didn't get the available subtitles diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index f278951ba..0bf028f61 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor): { 'format': fnode.text, 'url': video_url_template % fnode.text, + 'ext': fnode.text.partition('-')[0] } for fnode in format_doc.findall('./formats/format') @@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor): } # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['format'].partition('-')[0] + info.update(formats[-1]) return info diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py new file mode 100644 index 000000000..6b93afa50 --- /dev/null +++ b/youtube_dl/extractor/vice.py @@ -0,0 +1,38 @@ +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): + _VALID_URL = r'http://www.vice.com/.*?/(?P.+)' + + _TEST = { + u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', + u'info_dict': { + u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + try: + ooyala_url = self._og_search_video_url(webpage) + except ExtractorError: + try: + embed_code = self._search_regex( + r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, + u'ooyala embed code') + ooyala_url = OoyalaIE._url_for_embed_code(embed_code) + except ExtractorError: + raise ExtractorError(u'The page doesn\'t contain a video', expected=True) + return self.url_result(ooyala_url, ie='Ooyala') + diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 88b8b6be0..361619694 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -11,8 +11,8 @@ from ..utils import ( class XHamsterIE(InfoExtractor): """Information Extractor for xHamster""" - _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html' - _TEST = { + _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.+?)\.html(?:\?.*)?' + _TESTS = [{ u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', u'file': u'1509445.flv', u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', @@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor): u"uploader_id": u"Ruseful2011", u"title": u"FemaleAgent Shy beauty takes the bait" } - } + }, + { + u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + u'file': u'2221348.flv', + u'md5': u'e767b9475de189320f691f49c679c4c7', + u'info_dict': { + u"upload_date": u"20130914", + u"uploader_id": u"jojo747400", + u"title": u"Britney Spears Sexy Booty" + } + }] def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id + seo = mobj.group('seo') + mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo) webpage = self._download_webpage(mrss_url, video_id) mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f49665925..23a8097c5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ( (?:https?://)? # http(s):// (optional) (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| - tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains + tube\.majestyc\.net/| + youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ @@ -428,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 86: return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] elif len(s) == 85: - return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1] + return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: return s[81:36:-1] + s[0] + s[35:2:-1] elif len(s) == 83: @@ -782,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('verbose'): s = url_data['s'][0] if age_gate: - player_version = self._search_regex(r'ad3-(.+?)\.swf', - video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', - 'flash player', fatal=False) - player = 'flash player %s' % player_version + player = 'flash player' else: player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, 'html5 player', fatal=False) @@ -1007,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor): response = json.loads(page) except ValueError as err: raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) + if 'entry' not in response['feed']: + # Number of videos is a multiple of self._MAX_RESULTS + break # Extract video identifiers ids_in_page = [] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 768c6207d..814a9b6be 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -700,7 +700,16 @@ def unified_strdate(date_str): date_str = date_str.replace(',',' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] + format_expressions = [ + '%d %B %Y', + '%B %d %Y', + '%b %d %Y', + '%Y-%m-%d', + '%d/%m/%Y', + '%Y/%m/%d %H:%M:%S', + '%d.%m.%Y %H:%M', + '%Y-%m-%dT%H:%M:%SZ', + ] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -781,6 +790,18 @@ def platform_name(): return res +def write_string(s, out=None): + if out is None: + out = sys.stderr + assert type(s) == type(u'') + + if ('b' in getattr(out, 'mode', '') or + sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + s = s.encode(preferredencoding(), 'ignore') + out.write(s) + out.flush() + + def bytes_to_intlist(bs): if not bs: return [] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3b2505c77..80ccfbd4f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.12' +__version__ = '2013.09.17'