From: Philipp Hagemeister Date: Sat, 10 Jan 2015 01:03:46 +0000 (+0100) Subject: Merge remote-tracking branch 'xavierbeynon/master' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=51897bb77c504ad206abbef5ae7504fcd082b5b0;hp=ff0813313ac33b6abd03e9322b706dc83c5aeb14 Merge remote-tracking branch 'xavierbeynon/master' --- diff --git a/.gitignore b/.gitignore index 86312d4e4..0422adf44 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ updates_key.pem test/testdata .tox youtube-dl.zsh +.idea +.idea/* \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications: email: - filippo.valsorda@gmail.com - phihag@phihag.de - - jaime.marquinez.ferrandiz+travis@gmail.com - yasoob.khld@gmail.com # irc: # channels: diff --git a/AUTHORS b/AUTHORS index 29ce9e3e4..8f2010803 100644 --- a/AUTHORS +++ b/AUTHORS @@ -97,3 +97,7 @@ Petr Kutalek Will Glynn Max Reimann Cédric Luthi +Thijs Vermeir +Joel Leclerc +Christopher Krooss +Ondřej Caletka diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ff7b395a..7917abfc6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services ### Is anyone going to need the feature? -Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. ### Is your question about youtube-dl? diff --git a/Makefile b/Makefile index 71470eedb..578079879 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations + nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists tar: youtube-dl.tar.gz @@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py chmod a+x youtube-dl README.md: youtube_dl/*.py youtube_dl/*/*.py - COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py + COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py CONTRIBUTING.md: README.md python devscripts/make_contributing.py README.md CONTRIBUTING.md diff --git a/README.md b/README.md index 915bcd0cd..3d632a553 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,8 @@ which means you can modify it, redistribute it or use it however you like. for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line. + --print-json Be quiet and print the video information as + JSON (video is still being downloaded). --newline output progress bar as new lines --no-progress do not print progress bar --console-title display progress in console titlebar @@ -248,14 +250,15 @@ which means you can modify it, redistribute it or use it however you like. ## Video Format Options: -f, --format FORMAT video format code, specify the order of - preference using slashes: -f 22/17/18 . -f - mp4 , -f m4a and -f flv are also - supported. You can also use the special - names "best", "bestvideo", "bestaudio", - "worst", "worstvideo" and "worstaudio". By - default, youtube-dl will pick the best - quality. Use commas to download multiple - audio formats, such as -f + preference using slashes, as in -f 22/17/18 + . Instead of format codes, you can select + by extension for the extensions aac, m4a, + mp3, mp4, ogg, wav, webm. You can also use + the special names "best", "bestvideo", + "bestaudio", "worst". By default, youtube- + dl will pick the best quality. Use commas + to download multiple audio formats, such as + -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f \youtube-dl.conf`. +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`. # OUTPUT TEMPLATE @@ -449,6 +452,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). +### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? + +If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome. + +To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory. + +From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in. + ### How can I detect whether a given URL is supported by youtube-dl? For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index f0f0481c7..d3ef5f0b5 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -16,7 +16,7 @@ def main(): template = tmplf.read() ie_htmls = [] - for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): + for ie in youtube_dl.list_extractors(age_limit=None): ie_html = '{}'.format(ie.IE_NAME) ie_desc = getattr(ie, 'IE_DESC', None) if ie_desc is False: diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 140010644..3df4385a6 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -23,12 +23,12 @@ def main(): def gen_ies_md(ies): for ie in ies: - ie_md = '**{}**'.format(ie.IE_NAME) + ie_md = '**{0}**'.format(ie.IE_NAME) ie_desc = getattr(ie, 'IE_DESC', None) if ie_desc is False: continue if ie_desc is not None: - ie_md += ': {}'.format(ie.IE_DESC) + ie_md += ': {0}'.format(ie.IE_DESC) if not ie.working(): ie_md += ' (Currently broken)' yield ie_md diff --git a/test/helper.py b/test/helper.py index 96d58b7c1..c416f388c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL): def gettestcases(include_onlymatching=False): for ie in youtube_dl.extractor.gen_extractors(): - t = getattr(ie, '_TEST', None) - if t: - assert not hasattr(ie, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(ie).__name__ - tests = [t] - else: - tests = getattr(ie, '_TESTS', []) - for t in tests: - if not include_onlymatching and t.get('only_matching', False): - continue - t['name'] = type(ie).__name__[:-len('IE')] - yield t + for tc in ie.get_testcases(include_onlymatching): + yield tc md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() @@ -120,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict): else: if isinstance(expected, compat_str) and expected.startswith('md5:'): got = 'md5:' + md5(got_dict.get(info_field)) + elif isinstance(expected, compat_str) and expected.startswith('mincount:'): + got = got_dict.get(info_field) + self.assertTrue( + isinstance(got, list), + 'Expected field %s to be a list, but it is of type %s' % ( + info_field, type(got).__name__)) + expected_num = int(expected.partition(':')[2]) + assertGreaterEqual( + self, len(got), expected_num, + 'Expected %d items in field %s, but only got %d' % ( + expected_num, info_field, len(got) + ) + ) + continue else: got = got_dict.get(info_field) self.assertEqual(expected, got, diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 13c18ed95..be8d12997 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + def test_html_search_meta(self): + ie = self.ie + html = ''' + + + + + + + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8e4f930e..85d87f2c3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,8 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import copy + from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE @@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-high') + def test_format_selection_audio_exts(self): + formats = [ + {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + ] + + info_dict = _make_result(formats) + ydl = YDL({'format': 'best'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'aac-64') + + ydl = YDL({'format': 'mp3'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'mp3-64') + + ydl = YDL({'prefer_free_formats': True}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'ogg-64') + def test_format_selection_video(self): formats = [ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, @@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase): # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video - '138', '137', '248', '136', '247', '135', '246', + '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 5be065c43..6f5513faa 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase): 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', '505835.mp4', 2, old_age=25) - def test_pornotube(self): - self._assert_restricted( - 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', - '1689755.flv', 13) - if __name__ == '__main__': unittest.main() diff --git a/test/test_subtitles.py b/test/test_subtitles.py index d34565191..6336dd317 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import ( TEDIE, VimeoIE, WallaIE, + CeskaTelevizeIE, ) @@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles): self.assertEqual(len(subtitles), 0) +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): + url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' + IE = CeskaTelevizeIE + + def test_list_subtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_allsubtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['cs'])) + self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles), 0) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index dd49a6d17..16e1a1ddf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import json import xml.etree.ElementTree from youtube_dl.utils import ( + age_restricted, args_to_str, clean_html, DateRange, @@ -402,5 +403,12 @@ Trying to open render node... Success at /dev/dri/renderD128. ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') + def test_age_restricted(self): + self.assertFalse(age_restricted(None, 10)) # unrestricted content + self.assertFalse(age_restricted(1, None)) # unrestricted policy + self.assertFalse(age_restricted(8, 10)) + self.assertTrue(age_restricted(18, 14)) + self.assertFalse(age_restricted(18, 18)) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e2b823f66..61675d8ec 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -63,6 +63,7 @@ from .utils import ( YoutubeDLHandler, prepend_extension, args_to_str, + age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors @@ -202,6 +203,7 @@ class YoutubeDL(object): Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. + merge_output_format: Extension to use when merging formats. The following parameters are not used by YoutubeDL itself, they are used by @@ -550,13 +552,8 @@ class YoutubeDL(object): max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - age_limit = self.params.get('age_limit') - if age_limit is not None: - actual_age_limit = info_dict.get('age_limit') - if actual_age_limit is None: - actual_age_limit = 0 - if age_limit < actual_age_limit: - return 'Skipping "' + title + '" because it is age restricted' + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title return None @@ -790,7 +787,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: @@ -913,10 +910,23 @@ class YoutubeDL(object): 'contain the video, try using ' '"-f %s+%s"' % (format_2, format_1)) return + output_ext = ( + formats_info[0]['ext'] + if self.params.get('merge_output_format') is None + else self.params['merge_output_format']) selected_format = { 'requested_formats': formats_info, 'format': rf, 'ext': formats_info[0]['ext'], + 'width': formats_info[0].get('width'), + 'height': formats_info[0].get('height'), + 'resolution': formats_info[0].get('resolution'), + 'fps': formats_info[0].get('fps'), + 'vcodec': formats_info[0].get('vcodec'), + 'vbr': formats_info[0].get('vbr'), + 'acodec': formats_info[1].get('acodec'), + 'abr': formats_info[1].get('abr'), + 'ext': output_ext, } else: selected_format = None @@ -1333,7 +1343,9 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) idlen = max(len('format code'), max(len(f['format_id']) for f in formats)) - formats_s = [line(f, idlen) for f in formats] + formats_s = [ + line(f, idlen) for f in formats + if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e79320323..8e7b74466 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -38,7 +38,7 @@ from .update import update_self from .downloader import ( FileDownloader, ) -from .extractor import gen_extractors +from .extractor import gen_extractors, list_extractors from .YoutubeDL import YoutubeDL @@ -95,24 +95,22 @@ def _real_main(argv=None): _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] - extractors = gen_extractors() - if opts.list_extractors: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: compat_print(' ' + mu) sys.exit(0) if opts.list_extractor_descriptions: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) if desc is False: continue if hasattr(ie, 'SEARCH_KEY'): - _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny') + _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) compat_print(desc) @@ -168,6 +166,7 @@ def _real_main(argv=None): if opts.recodevideo is not None: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: parser.error('invalid video recode format specified') + if opts.date is not None: date = DateRange.day(opts.date) else: @@ -199,7 +198,8 @@ def _real_main(argv=None): ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' ' template'.format(outtmpl)) - any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json + any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json + any_printing = opts.print_json download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive # PostProcessors @@ -245,7 +245,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, - 'quiet': (opts.quiet or any_printing), + 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -255,9 +255,9 @@ def _real_main(argv=None): 'forceduration': opts.getduration, 'forcefilename': opts.getfilename, 'forceformat': opts.getformat, - 'forcejson': opts.dumpjson, + 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, - 'simulate': opts.simulate or any_printing, + 'simulate': opts.simulate or any_getting, 'skip_download': opts.skip_download, 'format': opts.format, 'format_limit': opts.format_limit, @@ -324,6 +324,7 @@ def _real_main(argv=None): 'encoding': opts.encoding, 'exec_cmd': opts.exec_cmd, 'extract_flat': opts.extract_flat, + 'merge_output_format': opts.merge_output_format, 'postprocessors': postprocessors, } @@ -365,3 +366,5 @@ def main(argv=None): sys.exit('ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + +__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index f9f6f3e73..c460c167a 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -187,24 +187,34 @@ def build_fragments_list(boot_info): return res -def write_flv_header(stream, metadata): - """Writes the FLV header and the metadata to stream""" +def write_unsigned_int(stream, val): + stream.write(struct_pack('!I', val)) + + +def write_unsigned_int_24(stream, val): + stream.write(struct_pack('!I', val)[1:]) + + +def write_flv_header(stream): + """Writes the FLV header to stream""" # FLV header stream.write(b'FLV\x01') stream.write(b'\x05') stream.write(b'\x00\x00\x00\x09') - # FLV File body stream.write(b'\x00\x00\x00\x00') - # FLVTAG - # Script data - stream.write(b'\x12') - # Size of the metadata with 3 bytes - stream.write(struct_pack('!L', len(metadata))[1:]) - stream.write(b'\x00\x00\x00\x00\x00\x00\x00') - stream.write(metadata) - # Magic numbers extracted from the output files produced by AdobeHDS.php - # (https://github.com/K-S-V/Scripts) - stream.write(b'\x00\x00\x01\x73') + + +def write_metadata_tag(stream, metadata): + """Writes optional metadata tag to stream""" + SCRIPT_TAG = b'\x12' + FLV_TAG_HEADER_LEN = 11 + + if metadata: + stream.write(SCRIPT_TAG) + write_unsigned_int_24(stream, len(metadata)) + stream.write(b'\x00\x00\x00\x00\x00\x00\x00') + stream.write(metadata) + write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) def _add_ns(prop): @@ -256,7 +266,11 @@ class F4mFD(FileDownloader): bootstrap = self.ydl.urlopen(bootstrap_url).read() else: bootstrap = base64.b64decode(bootstrap_node.text) - metadata = base64.b64decode(media.find(_add_ns('metadata')).text) + metadata_node = media.find(_add_ns('metadata')) + if metadata_node is not None: + metadata = base64.b64decode(metadata_node.text) + else: + metadata = None boot_info = read_bootstrap_info(bootstrap) fragments_list = build_fragments_list(boot_info) @@ -269,7 +283,8 @@ class F4mFD(FileDownloader): tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') - write_flv_header(dest_stream, metadata) + write_flv_header(dest_stream) + write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - check_executable, encodeFilename, ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader): '-bsf:a', 'aac_adtstoasc', encodeFilename(tmpfilename, for_subprocess=True)] - for program in ['avconv', 'ffmpeg']: - if check_executable(program, ['-version']): - break - else: + ffpp = FFmpegPostProcessor(downloader=self) + program = ffpp._executable + if program is None: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False - cmd = [program] + args - - ffpp = FFmpegPostProcessor(downloader=self) ffpp.check_version() + cmd = [program] + args retval = subprocess.call(cmd) if retval == 0: diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index c53195da0..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,8 +4,8 @@ import os import subprocess from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL from ..utils import ( + check_executable, encodeFilename, ) @@ -20,11 +20,7 @@ class MplayerFD(FileDownloader): 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] # Check for mplayer first - try: - subprocess.call( - ['mplayer', '-h'], - stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT) - except (OSError, IOError): + if not check_executable('mplayer', ['-h']): self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e4c51f238..f544e87f1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -71,6 +71,7 @@ from .cnn import ( from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE +from .commonmistakes import CommonMistakesIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE @@ -158,6 +159,7 @@ from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .giantbomb import GiantBombIE +from .giga import GigaIE from .glide import GlideIE from .globo import GloboIE from .godtube import GodTubeIE @@ -272,6 +274,7 @@ from .nbc import ( ) from .ndr import NDRIE from .ndtv import NDTVIE +from .netzkino import NetzkinoIE from .nerdcubed import NerdCubedFeedIE from .newgrounds import NewgroundsIE from .newstube import NewstubeIE @@ -324,6 +327,7 @@ from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE @@ -344,6 +348,7 @@ from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, + RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, ) @@ -473,6 +478,7 @@ from .videott import VideoTtIE from .videoweed import VideoWeedIE from .vidme import VidmeIE from .vidzi import VidziIE +from .vier import VierIE, VierVideosIE from .vimeo import ( VimeoIE, VimeoAlbumIE, @@ -508,6 +514,7 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) +from .webofstories import WebOfStoriesIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE @@ -543,7 +550,7 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeShowIE, YoutubeSubscriptionsIE, - YoutubeTopListIE, + YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, YoutubeWatchLaterIE, @@ -569,6 +576,17 @@ def gen_extractors(): return [klass() for klass in _ALL_CLASSES] +def list_extractors(age_limit): + """ + Return a list of extractors that are suitable for the given age, + sorted by extractor ID. + """ + + return sorted( + filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), + key=lambda ie: ie.IE_NAME.lower()) + + def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse from ..utils import ( determine_ext, ExtractorError, + remove_end, ) @@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(?P<title>.+?)', webpage, 'title') - title = title.strip() - links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) - links = map(compat_urllib_parse.unquote, links) - - thumbnail = None - video_url = None - for link in links: - if link.endswith('.png'): - thumbnail = link - elif '/videos/' in link: - video_url = link + title = self._html_search_regex( + r'\s*(?P<title>.+?)\s*', webpage, 'title') + video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = compat_urllib_parse.unquote(video_urls[0]) + thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = compat_urllib_parse.unquote(thumbnails[0]) + if not video_url: raise ExtractorError('Could not find video URL') + ext = '.' + determine_ext(video_url) - if ext == title[-len(ext):]: - title = title[:-len(ext)] + title = remove_end(title, ext) return { 'id': video_id, diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index f690dc803..1cf48fe0d 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -10,7 +10,7 @@ from ..compat import compat_HTTPError class BBCCoUkIE(SubtitlesInfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P[\da-z]{8})' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P[\da-z]{8})' _TESTS = [ { @@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'info_dict': { 'id': 'b039d07m', 'ext': 'flv', - 'title': 'Kaleidoscope: Leonard Cohen', - 'description': 'md5:db4755d7a665ae72343779f7dacb402c', + 'title': 'Kaleidoscope, Leonard Cohen', + 'description': 'The Canadian poet and songwriter reflects on his musical career.', 'duration': 1740, }, 'params': { @@ -84,6 +84,40 @@ class BBCCoUkIE(SubtitlesInfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3', + 'note': 'Audio', + 'info_dict': { + 'id': 'p02frcch', + 'ext': 'flv', + 'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix', + 'description': 'French house superstar Madeon takes us out of the club and onto the after party.', + 'duration': 3507, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', + 'note': 'Video', + 'info_dict': { + 'id': 'p025c103', + 'ext': 'flv', + 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', + 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', + 'duration': 226, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', + 'only_matching': True, } ] @@ -241,8 +275,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor): # fallback to legacy playlist playlist = self._download_xml( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, - playlist_id, 'Downloading legacy playlist XML') + 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, + playlist_id, 'Downloading legacy playlist XML') no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') if no_items is not None: diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 003e50002..d2abd4d77 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -16,7 +16,7 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 'info_dict': { - 'id': '417cd61c-c793-4e8e-b006-e445ecc45add', + 'id': '740ab250-bb94-4a8a-8787-fe0de7c74471', 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 'ext': 'flv', 'title': 'BET News Presents: A Conversation With President Obama', @@ -35,7 +35,7 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', 'info_dict': { - 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4', + 'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d', 'display_id': 'justice-for-ferguson-a-community-reacts', 'ext': 'flv', 'title': 'Justice for Ferguson: A Community Reacts', @@ -55,7 +55,6 @@ class BetIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) media_url = compat_urllib_parse.unquote(self._search_regex( diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 241b904a9..75d744852 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,9 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, int_or_none, unified_strdate, ) @@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor): thumbnail = self._html_search_meta( 'thumbnailUrl', video_code, 'thumbnail', fatal=False) - player_params = compat_parse_qs(self._html_search_regex( - r'