From: Philipp Hagemeister Date: Thu, 8 Jan 2015 15:37:49 +0000 (+0100) Subject: Merge pull request #4647 from aajanki/hds_metadata X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=ce08a86462984ff82368c1acd7285e9625855191;hp=f14f2a6d7909eae5cf0d6f447f8e8a3eb53752d4 Merge pull request #4647 from aajanki/hds_metadata [downloader/f4m] Improved metadata handling --- diff --git a/AUTHORS b/AUTHORS index a63c97ae0..8f2010803 100644 --- a/AUTHORS +++ b/AUTHORS @@ -100,3 +100,4 @@ Cédric Luthi Thijs Vermeir Joel Leclerc Christopher Krooss +Ondřej Caletka diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ff7b395a..7917abfc6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services ### Is anyone going to need the feature? -Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. ### Is your question about youtube-dl? diff --git a/Makefile b/Makefile index e53a367ef..578079879 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py chmod a+x youtube-dl README.md: youtube_dl/*.py youtube_dl/*/*.py - COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py + COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py CONTRIBUTING.md: README.md python devscripts/make_contributing.py README.md CONTRIBUTING.md diff --git a/README.md b/README.md index 915bcd0cd..24b4d7970 100644 --- a/README.md +++ b/README.md @@ -248,14 +248,15 @@ which means you can modify it, redistribute it or use it however you like. ## Video Format Options: -f, --format FORMAT video format code, specify the order of - preference using slashes: -f 22/17/18 . -f - mp4 , -f m4a and -f flv are also - supported. You can also use the special - names "best", "bestvideo", "bestaudio", - "worst", "worstvideo" and "worstaudio". By - default, youtube-dl will pick the best - quality. Use commas to download multiple - audio formats, such as -f + preference using slashes, as in -f 22/17/18 + . Instead of format codes, you can select + by extension for the extensions aac, m4a, + mp3, mp4, ogg, wav, webm. You can also use + the special names "best", "bestvideo", + "bestaudio", "worst". By default, youtube- + dl will pick the best quality. Use commas + to download multiple audio formats, such as + -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f \youtube-dl.conf`. +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`. # OUTPUT TEMPLATE diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index f0f0481c7..d3ef5f0b5 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -16,7 +16,7 @@ def main(): template = tmplf.read() ie_htmls = [] - for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): + for ie in youtube_dl.list_extractors(age_limit=None): ie_html = '{}'.format(ie.IE_NAME) ie_desc = getattr(ie, 'IE_DESC', None) if ie_desc is False: diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 140010644..3df4385a6 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -23,12 +23,12 @@ def main(): def gen_ies_md(ies): for ie in ies: - ie_md = '**{}**'.format(ie.IE_NAME) + ie_md = '**{0}**'.format(ie.IE_NAME) ie_desc = getattr(ie, 'IE_DESC', None) if ie_desc is False: continue if ie_desc is not None: - ie_md += ': {}'.format(ie.IE_DESC) + ie_md += ': {0}'.format(ie.IE_DESC) if not ie.working(): ie_md += ' (Currently broken)' yield ie_md diff --git a/test/helper.py b/test/helper.py index 96d58b7c1..77225e4f7 100644 --- a/test/helper.py +++ b/test/helper.py @@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL): def gettestcases(include_onlymatching=False): for ie in youtube_dl.extractor.gen_extractors(): - t = getattr(ie, '_TEST', None) - if t: - assert not hasattr(ie, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(ie).__name__ - tests = [t] - else: - tests = getattr(ie, '_TESTS', []) - for t in tests: - if not include_onlymatching and t.get('only_matching', False): - continue - t['name'] = type(ie).__name__[:-len('IE')] - yield t + for tc in ie.get_testcases(include_onlymatching): + yield tc md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 13c18ed95..be8d12997 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + def test_html_search_meta(self): + ie = self.ie + html = ''' + + + + + + + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 730f7ec26..85d87f2c3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,8 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import copy + from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE @@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-high') + def test_format_selection_audio_exts(self): + formats = [ + {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + ] + + info_dict = _make_result(formats) + ydl = YDL({'format': 'best'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'aac-64') + + ydl = YDL({'format': 'mp3'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'mp3-64') + + ydl = YDL({'prefer_free_formats': True}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'ogg-64') + def test_format_selection_video(self): formats = [ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, diff --git a/test/test_subtitles.py b/test/test_subtitles.py index d34565191..6336dd317 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import ( TEDIE, VimeoIE, WallaIE, + CeskaTelevizeIE, ) @@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles): self.assertEqual(len(subtitles), 0) +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): + url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' + IE = CeskaTelevizeIE + + def test_list_subtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, None) + + def test_allsubtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['cs'])) + self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles), 0) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index dd49a6d17..16e1a1ddf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import json import xml.etree.ElementTree from youtube_dl.utils import ( + age_restricted, args_to_str, clean_html, DateRange, @@ -402,5 +403,12 @@ Trying to open render node... Success at /dev/dri/renderD128. ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') + def test_age_restricted(self): + self.assertFalse(age_restricted(None, 10)) # unrestricted content + self.assertFalse(age_restricted(1, None)) # unrestricted policy + self.assertFalse(age_restricted(8, 10)) + self.assertTrue(age_restricted(18, 14)) + self.assertFalse(age_restricted(18, 18)) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 806e7b239..24d6c2de7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -63,6 +63,7 @@ from .utils import ( YoutubeDLHandler, prepend_extension, args_to_str, + age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors @@ -550,13 +551,8 @@ class YoutubeDL(object): max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - age_limit = self.params.get('age_limit') - if age_limit is not None: - actual_age_limit = info_dict.get('age_limit') - if actual_age_limit is None: - actual_age_limit = 0 - if age_limit < actual_age_limit: - return 'Skipping "' + title + '" because it is age restricted' + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title return None @@ -790,7 +786,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 56f560d26..4aa7fba6a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -38,7 +38,7 @@ from .update import update_self from .downloader import ( FileDownloader, ) -from .extractor import gen_extractors +from .extractor import gen_extractors, list_extractors from .YoutubeDL import YoutubeDL @@ -95,17 +95,15 @@ def _real_main(argv=None): _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] - extractors = gen_extractors() - if opts.list_extractors: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: compat_print(' ' + mu) sys.exit(0) if opts.list_extractor_descriptions: - for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + for ie in list_extractors(opts.age_limit): if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) @@ -365,3 +363,5 @@ def main(argv=None): sys.exit('ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + +__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8e47bd60d..8dacc2c54 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .giantbomb import GiantBombIE +from .giga import GigaIE from .glide import GlideIE from .globo import GloboIE from .godtube import GodTubeIE @@ -574,6 +575,17 @@ def gen_extractors(): return [klass() for klass in _ALL_CLASSES] +def list_extractors(age_limit): + """ + Return a list of extractors that are suitable for the given age, + sorted by extractor ID. + """ + + return sorted( + filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), + key=lambda ie: ie.IE_NAME.lower()) + + def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" return globals()[ie_name + 'IE'] diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 241b904a9..75d744852 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,9 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, int_or_none, unified_strdate, ) @@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor): thumbnail = self._html_search_meta( 'thumbnailUrl', video_code, 'thumbnail', fatal=False) - player_params = compat_parse_qs(self._html_search_regex( - r'