From: Sergey M Date: Thu, 21 Apr 2016 15:53:16 +0000 (+0700) Subject: Merge pull request #9110 from remitamine/parse_duration X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=b6c0d4f4315a282257adc6ab980b4cf4bfb3d418;hp=acaff49575f9f0c2ea893c4dbbbf57ba8baf656a Merge pull request #9110 from remitamine/parse_duration [utils] imporove parse_duration to handle more formats --- diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bf9494646..35f8e6863 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.06 +[debug] youtube-dl version 2016.04.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.gitignore b/.gitignore index 26dbde73d..72c10425d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish +youtube_dl/extractor/lazy_extractors.py youtube-dl youtube-dl.exe youtube-dl.tar.gz diff --git a/AUTHORS b/AUTHORS index ea8d39978..07cade723 100644 --- a/AUTHORS +++ b/AUTHORS @@ -167,3 +167,4 @@ Kacper Michajłow José Joaquín Atria ViÅ¥as Strádal Kagami Hiiragi +Philip Huppert diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0df6193fb..c83b8655a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor diff --git a/Makefile b/Makefile index ba7f7ed36..06cffcb71 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -88,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in fish-completion: youtube-dl.fish +lazy-extractors: youtube_dl/extractor/lazy_extractors.py + +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) + $(PYTHON) devscripts/make_lazy_extractors.py $@ + youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ diff --git a/README.md b/README.md index e972bf69f..cd18edd87 100644 --- a/README.md +++ b/README.md @@ -889,14 +889,14 @@ After you have ensured this site is distributing it's content legally, you can f # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py new file mode 100644 index 000000000..2e6e6641b --- /dev/null +++ b/devscripts/lazy_load_template.py @@ -0,0 +1,19 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + + +class LazyLoadExtractor(object): + _module = None + + @classmethod + def ie_key(cls): + return cls.__name__[:-2] + + def __new__(cls, *args, **kwargs): + mod = __import__(cls._module, fromlist=(cls.__name__,)) + real_cls = getattr(mod, cls.__name__) + instance = real_cls.__new__(real_cls) + instance.__init__(*args, **kwargs) + return instance diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py new file mode 100644 index 000000000..b5a8b9190 --- /dev/null +++ b/devscripts/make_lazy_extractors.py @@ -0,0 +1,63 @@ +from __future__ import unicode_literals, print_function + +from inspect import getsource +import os +from os.path import dirname as dirn +import sys + +print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +lazy_extractors_filename = sys.argv[1] +if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + +from youtube_dl.extractor import _ALL_CLASSES +from youtube_dl.extractor.common import InfoExtractor + +with open('devscripts/lazy_load_template.py', 'rt') as f: + module_template = f.read() + +module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] + +ie_template = ''' +class {name}(LazyLoadExtractor): + _VALID_URL = {valid_url!r} + _module = '{module}' +''' + +make_valid_template = ''' + @classmethod + def _make_valid_url(cls): + return {valid_url!r} +''' + + +def build_lazy_ie(ie, name): + valid_url = getattr(ie, '_VALID_URL', None) + s = ie_template.format( + name=name, + valid_url=valid_url, + module=ie.__module__) + if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: + s += '\n' + getsource(ie.suitable) + if hasattr(ie, '_make_valid_url'): + # search extractors + s += make_valid_template.format(valid_url=ie._make_valid_url()) + return s + +names = [] +for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: + name = ie.ie_key() + 'IE' + src = build_lazy_ie(ie, name) + module_contents.append(src) + names.append(name) + +module_contents.append( + '_ALL_CLASSES = [{0}]'.format(', '.join(names))) + +module_src = '\n'.join(module_contents) + '\n' + +with open(lazy_extractors_filename, 'wt') as f: + f.write(module_src) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d6ee8476b..e12a7d182 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -50,6 +50,7 @@ - **arte.tv:ddc** - **arte.tv:embed** - **arte.tv:future** + - **arte.tv:info** - **arte.tv:magazine** - **AtresPlayer** - **ATTTechChannel** @@ -115,6 +116,7 @@ - **Cinemassacre** - **Clipfish** - **cliphunter** + - **ClipRs** - **Clipsyndicate** - **cloudtime**: CloudTime - **Cloudy** @@ -286,7 +288,6 @@ - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV - **Izlesene** - - **JadoreCettePub** - **JeuxVideo** - **Jove** - **jpopsuki.tv** @@ -350,13 +351,15 @@ - **miomio.tv** - **MiTele**: mitele.es - **mixcloud** + - **mixcloud:playlist** + - **mixcloud:stream** + - **mixcloud:user** - **MLB** - **Mnet** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** - **Moniker**: allmyvideos.net and vidspot.net - - **mooshare**: Mooshare.biz - **Morningstar**: morningstar.com - **Motherless** - **Motorsport**: motorsport.com @@ -393,7 +396,6 @@ - **ndr:embed:base** - **NDTV** - **NerdCubedFeed** - - **Nerdist** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 - **netease:mv**: 网易云音乐 - MV @@ -484,6 +486,7 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PressTV** - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital @@ -608,6 +611,7 @@ - **Tagesschau** - **Tapely** - **Tass** + - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** @@ -624,7 +628,6 @@ - **TeleTask** - **TF1** - **TheIntercept** - - **TheOnion** - **ThePlatform** - **ThePlatformFeed** - **TheScene** @@ -683,7 +686,6 @@ - **twitter** - **twitter:amplify** - **twitter:card** - - **Ubu** - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 @@ -753,7 +755,6 @@ - **Walla** - **WashingtonPost** - **wat.tv** - - **WayOfTheMaster** - **WDR** - **wdr:mobile** - **WDRMaus**: Sendung mit der Maus diff --git a/setup.cfg b/setup.cfg index 5760112d4..2dc06ffe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git ignore = E402,E501,E731 diff --git a/setup.py b/setup.py index bfe931f5b..9444d403d 100644 --- a/setup.py +++ b/setup.py @@ -8,11 +8,12 @@ import warnings import sys try: - from setuptools import setup + from setuptools import setup, Command setuptools_available = True except ImportError: - from distutils.core import setup + from distutils.core import setup, Command setuptools_available = False +from distutils.spawn import spawn try: # This will create an exe that needs Microsoft Visual C++ 2008 @@ -70,6 +71,22 @@ else: else: params['scripts'] = ['bin/youtube-dl'] +class build_lazy_extractors(Command): + description = "Build the extractor lazy loading module" + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + spawn( + [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], + dry_run=self.dry_run, + ) + # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) @@ -107,5 +124,6 @@ setup( "Programming Language :: Python :: 3.4", ], + cmdclass={'build_lazy_extractors': build_lazy_extractors}, **params ) diff --git a/test/helper.py b/test/helper.py index f2d878212..b8e22c5cb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -143,6 +143,9 @@ def expect_value(self, got, expected, field): expect_value(self, item_got, item_expected, field) else: if isinstance(expected, compat_str) and expected.startswith('md5:'): + self.assertTrue( + isinstance(got, compat_str), + 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) got = 'md5:' + md5(got) elif isinstance(expected, compat_str) and expected.startswith('mincount:'): self.assertTrue( diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 938466a80..6404ac89f 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor +from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError class TestIE(InfoExtractor): @@ -66,5 +67,14 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') + def test_download_json(self): + uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') + self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) + uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') + self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) + uri = encode_data_uri(b'{"foo": invalid}', 'application/json') + self.assertRaises(ExtractorError, self.ie._download_json, uri, None) + self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index e0323a5c6..e16a6761b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -20,6 +20,7 @@ from youtube_dl.utils import ( args_to_str, encode_base_n, clean_html, + date_from_str, DateRange, detect_exe_version, determine_ext, @@ -234,6 +235,13 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + def test_date_from_str(self): + self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) + self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) + self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) + self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year')) + self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month')) + def test_daterange(self): _20century = DateRange("19000101", "20000101") self.assertFalse("17890714" in _20century) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 47df0f348..af1c45421 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') entries = result['entries'] - self.assertTrue(len(entries) >= 20) + self.assertTrue(len(entries) >= 50) original_video = entries[0] self.assertEqual(original_video['id'], 'OQpdSVF_k_w') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d7aa951ff..a89a71a25 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,7 +82,7 @@ from .utils import ( YoutubeDLHandler, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractors +from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( @@ -378,8 +378,9 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) + if not isinstance(ie, type): + self._ies_instances[ie.ie_key()] = ie + ie.set_downloader(self) def get_info_extractor(self, ie_key): """ @@ -397,7 +398,7 @@ class YoutubeDL(object): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractors(): + for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp): @@ -661,6 +662,7 @@ class YoutubeDL(object): if not ie.suitable(url): continue + ie = self.get_info_extractor(ie.ie_key()) if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') @@ -1240,7 +1242,10 @@ class YoutubeDL(object): self.list_thumbnails(info_dict) return - if thumbnails and 'thumbnail' not in info_dict: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: @@ -1954,6 +1959,8 @@ class YoutubeDL(object): write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + if _LAZY_LOADER: + self._write_string('[debug] Lazy loading extractors enabled' + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 30277dc20..8d642fc3e 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -225,7 +225,7 @@ class FFmpegFD(ExternalFD): args += ['-i', url, '-c', 'copy'] if protocol == 'm3u8': - if self.params.get('hls_use_mpegts', False): + if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py index 3eb29526c..939358b2a 100644 --- a/youtube_dl/downloader/rtsp.py +++ b/youtube_dl/downloader/rtsp.py @@ -27,6 +27,8 @@ class RtspFD(FileDownloader): self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False + self._debug_cmd(args) + retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3121d83c..18d8dbcd6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,1007 +1,33 @@ from __future__ import unicode_literals -from .abc import ABCIE -from .abc7news import Abc7NewsIE -from .academicearth import AcademicEarthCourseIE -from .acast import ( - ACastIE, - ACastChannelIE, -) -from .addanime import AddAnimeIE -from .adobetv import ( - AdobeTVIE, - AdobeTVShowIE, - AdobeTVChannelIE, - AdobeTVVideoIE, -) -from .adultswim import AdultSwimIE -from .aenetworks import AENetworksIE -from .aftonbladet import AftonbladetIE -from .airmozilla import AirMozillaIE -from .aljazeera import AlJazeeraIE -from .alphaporno import AlphaPornoIE -from .animeondemand import AnimeOnDemandIE -from .anitube import AnitubeIE -from .anysex import AnySexIE -from .aol import ( - AolIE, - AolFeaturesIE, -) -from .allocine import AllocineIE -from .aparat import AparatIE -from .appleconnect import AppleConnectIE -from .appletrailers import ( - AppleTrailersIE, - AppleTrailersSectionIE, -) -from .archiveorg import ArchiveOrgIE -from .ard import ( - ARDIE, - ARDMediathekIE, - SportschauIE, -) -from .arte import ( - ArteTvIE, - ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, - ArteTVEmbedIE, -) -from .atresplayer import AtresPlayerIE -from .atttechchannel import ATTTechChannelIE -from .audimedia import AudiMediaIE -from .audioboom import AudioBoomIE -from .audiomack import AudiomackIE, AudiomackAlbumIE -from .azubu import AzubuIE, AzubuLiveIE -from .baidu import BaiduVideoIE -from .bambuser import BambuserIE, BambuserChannelIE -from .bandcamp import BandcampIE, BandcampAlbumIE -from .bbc import ( - BBCCoUkIE, - BBCCoUkArticleIE, - BBCIE, -) -from .beeg import BeegIE -from .behindkink import BehindKinkIE -from .beatportpro import BeatportProIE -from .bet import BetIE -from .bigflix import BigflixIE -from .bild import BildIE -from .bilibili import BiliBiliIE -from .biobiochiletv import BioBioChileTVIE -from .bleacherreport import ( - BleacherReportIE, - BleacherReportCMSIE, -) -from .blinkx import BlinkxIE -from .bloomberg import BloombergIE -from .bokecc import BokeCCIE -from .bpb import BpbIE -from .br import BRIE -from .bravotv import BravoTVIE -from .breakcom import BreakIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE -from .c56 import C56IE -from .camdemy import ( - CamdemyIE, - CamdemyFolderIE -) -from .camwithher import CamWithHerIE -from .canalplus import CanalplusIE -from .canalc2 import Canalc2IE -from .canvas import CanvasIE -from .cbc import ( - CBCIE, - CBCPlayerIE, -) -from .cbs import CBSIE -from .cbsinteractive import CBSInteractiveIE -from .cbsnews import ( - CBSNewsIE, - CBSNewsLiveVideoIE, -) -from .cbssports import CBSSportsIE -from .ccc import CCCIE -from .cda import CDAIE -from .ceskatelevize import CeskaTelevizeIE -from .channel9 import Channel9IE -from .chaturbate import ChaturbateIE -from .chilloutzone import ChilloutzoneIE -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE -from .cinemassacre import CinemassacreIE -from .clipfish import ClipfishIE -from .cliphunter import CliphunterIE -from .clipsyndicate import ClipsyndicateIE -from .cloudy import CloudyIE -from .clubic import ClubicIE -from .clyp import ClypIE -from .cmt import CMTIE -from .cnbc import CNBCIE -from .cnn import ( - CNNIE, - CNNBlogsIE, - CNNArticleIE, -) -from .collegehumor import CollegeHumorIE -from .collegerama import CollegeRamaIE -from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE -from .comcarcoff import ComCarCoffIE -from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE -from .condenast import CondeNastIE -from .cracked import CrackedIE -from .crackle import CrackleIE -from .criterion import CriterionIE -from .crooksandliars import CrooksAndLiarsIE -from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE -) -from .cspan import CSpanIE -from .ctsnews import CtsNewsIE -from .cultureunplugged import CultureUnpluggedIE -from .cwtv import CWTVIE -from .dailymotion import ( - DailymotionIE, - DailymotionPlaylistIE, - DailymotionUserIE, - DailymotionCloudIE, -) -from .daum import ( - DaumIE, - DaumClipIE, - DaumPlaylistIE, - DaumUserIE, -) -from .dbtv import DBTVIE -from .dcn import ( - DCNIE, - DCNVideoIE, - DCNLiveIE, - DCNSeasonIE, -) -from .dctp import DctpTvIE -from .deezer import DeezerPlaylistIE -from .democracynow import DemocracynowIE -from .dfb import DFBIE -from .dhm import DHMIE -from .dotsub import DotsubIE -from .douyutv import DouyuTVIE -from .dplay import DPlayIE -from .dramafever import ( - DramaFeverIE, - DramaFeverSeriesIE, -) -from .dreisat import DreiSatIE -from .drbonanza import DRBonanzaIE -from .drtuber import DrTuberIE -from .drtv import DRTVIE -from .dvtv import DVTVIE -from .dump import DumpIE -from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE -from .discovery import DiscoveryIE -from .dropbox import DropboxIE -from .dw import ( - DWIE, - DWArticleIE, -) -from .eagleplatform import EaglePlatformIE -from .ebaumsworld import EbaumsWorldIE -from .echomsk import EchoMskIE -from .ehow import EHowIE -from .eighttracks import EightTracksIE -from .einthusan import EinthusanIE -from .eitb import EitbIE -from .ellentv import ( - EllenTVIE, - EllenTVClipsIE, -) -from .elpais import ElPaisIE -from .embedly import EmbedlyIE -from .engadget import EngadgetIE -from .eporner import EpornerIE -from .eroprofile import EroProfileIE -from .escapist import EscapistIE -from .espn import ESPNIE -from .esri import EsriVideoIE -from .europa import EuropaIE -from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE -from .expotv import ExpoTVIE -from .extremetube import ExtremeTubeIE -from .facebook import FacebookIE -from .faz import FazIE -from .fc2 import FC2IE -from .fczenit import FczenitIE -from .firstpost import FirstpostIE -from .firsttv import FirstTVIE -from .fivemin import FiveMinIE -from .fivetv import FiveTVIE -from .fktv import FKTVIE -from .flickr import FlickrIE -from .folketinget import FolketingetIE -from .footyroom import FootyRoomIE -from .fourtube import FourTubeIE -from .fox import FOXIE -from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE -from .foxsports import FoxSportsIE -from .franceculture import ( - FranceCultureIE, - FranceCultureEmissionIE, -) -from .franceinter import FranceInterIE -from .francetv import ( - PluzzIE, - FranceTvInfoIE, - FranceTVIE, - GenerationQuoiIE, - CultureboxIE, -) -from .freesound import FreesoundIE -from .freespeech import FreespeechIE -from .freevideo import FreeVideoIE -from .funimation import FunimationIE -from .funnyordie import FunnyOrDieIE -from .gameinformer import GameInformerIE -from .gamekings import GamekingsIE -from .gameone import ( - GameOneIE, - GameOnePlaylistIE, -) -from .gamersyde import GamersydeIE -from .gamespot import GameSpotIE -from .gamestar import GameStarIE -from .gametrailers import GametrailersIE -from .gazeta import GazetaIE -from .gdcvault import GDCVaultIE -from .generic import GenericIE -from .gfycat import GfycatIE -from .giantbomb import GiantBombIE -from .giga import GigaIE -from .glide import GlideIE -from .globo import ( - GloboIE, - GloboArticleIE, -) -from .godtube import GodTubeIE -from .goldenmoustache import GoldenMoustacheIE -from .golem import GolemIE -from .googledrive import GoogleDriveIE -from .googleplus import GooglePlusIE -from .googlesearch import GoogleSearchIE -from .goshgay import GoshgayIE -from .gputechconf import GPUTechConfIE -from .groupon import GrouponIE -from .hark import HarkIE -from .hbo import HBOIE -from .hearthisat import HearThisAtIE -from .heise import HeiseIE -from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE -from .hentaistigma import HentaiStigmaIE -from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE -from .hornbunny import HornBunnyIE -from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE -from .huffpost import HuffPostIE -from .hypem import HypemIE -from .iconosquare import IconosquareIE -from .ign import ( - IGNIE, - OneUPIE, - PCMagIE, -) -from .imdb import ( - ImdbIE, - ImdbListIE -) -from .imgur import ( - ImgurIE, - ImgurAlbumIE, -) -from .ina import InaIE -from .indavideo import ( - IndavideoIE, - IndavideoEmbedIE, -) -from .infoq import InfoQIE -from .instagram import InstagramIE, InstagramUserIE -from .internetvideoarchive import InternetVideoArchiveIE -from .iprima import IPrimaIE -from .iqiyi import IqiyiIE -from .ir90tv import Ir90TvIE -from .ivi import ( - IviIE, - IviCompilationIE -) -from .ivideon import IvideonIE -from .izlesene import IzleseneIE -from .jadorecettepub import JadoreCettePubIE -from .jeuxvideo import JeuxVideoIE -from .jove import JoveIE -from .jwplatform import JWPlatformIE -from .jpopsukitv import JpopsukiIE -from .kaltura import KalturaIE -from .kanalplay import KanalPlayIE -from .kankan import KankanIE -from .karaoketv import KaraoketvIE -from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE -from .khanacademy import KhanAcademyIE -from .kickstarter import KickStarterIE -from .keek import KeekIE -from .konserthusetplay import KonserthusetPlayIE -from .kontrtube import KontrTubeIE -from .krasview import KrasViewIE -from .ku6 import Ku6IE -from .kusi import KUSIIE -from .kuwo import ( - KuwoIE, - KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, - KuwoCategoryIE, - KuwoMvIE, -) -from .la7 import LA7IE -from .laola1tv import Laola1TvIE -from .lecture2go import Lecture2GoIE -from .lemonde import LemondeIE -from .leeco import ( - LeIE, - LePlaylistIE, - LetvCloudIE, -) -from .libsyn import LibsynIE -from .lifenews import ( - LifeNewsIE, - LifeEmbedIE, -) -from .limelight import ( - LimelightMediaIE, - LimelightChannelIE, - LimelightChannelListIE, -) -from .liveleak import LiveLeakIE -from .livestream import ( - LivestreamIE, - LivestreamOriginalIE, - LivestreamShortenerIE, -) -from .lnkgo import LnkGoIE -from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE -from .lynda import ( - LyndaIE, - LyndaCourseIE -) -from .m6 import M6IE -from .macgamestore import MacGameStoreIE -from .mailru import MailRuIE -from .makerschannel import MakersChannelIE -from .makertv import MakerTVIE -from .malemotion import MalemotionIE -from .matchtv import MatchTVIE -from .mdr import MDRIE -from .metacafe import MetacafeIE -from .metacritic import MetacriticIE -from .mgoon import MgoonIE -from .minhateca import MinhatecaIE -from .ministrygrid import MinistryGridIE -from .minoto import MinotoIE -from .miomio import MioMioIE -from .mit import TechTVMITIE, MITIE, OCWMITIE -from .mitele import MiTeleIE -from .mixcloud import MixcloudIE -from .mlb import MLBIE -from .mnet import MnetIE -from .mpora import MporaIE -from .moevideo import MoeVideoIE -from .mofosex import MofosexIE -from .mojvideo import MojvideoIE -from .moniker import MonikerIE -from .mooshare import MooshareIE -from .morningstar import MorningstarIE -from .motherless import MotherlessIE -from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE -from .moviezine import MoviezineIE -from .mtv import ( - MTVIE, - MTVServicesEmbeddedIE, - MTVIggyIE, - MTVDEIE, -) -from .muenchentv import MuenchenTVIE -from .musicplayon import MusicPlayOnIE -from .muzu import MuzuTVIE -from .mwave import MwaveIE -from .myspace import MySpaceIE, MySpaceAlbumIE -from .myspass import MySpassIE -from .myvi import MyviIE -from .myvideo import MyVideoIE -from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicIE, - NationalGeographicChannelIE, -) -from .naver import NaverIE -from .nba import NBAIE -from .nbc import ( - CSNNEIE, - NBCIE, - NBCNewsIE, - NBCSportsIE, - NBCSportsVPlayerIE, - MSNBCIE, -) -from .ndr import ( - NDRIE, - NJoyIE, - NDREmbedBaseIE, - NDREmbedIE, - NJoyEmbedIE, -) -from .ndtv import NDTVIE -from .netzkino import NetzkinoIE -from .nerdcubed import NerdCubedFeedIE -from .nerdist import NerdistIE -from .neteasemusic import ( - NetEaseMusicIE, - NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, - NetEaseMusicListIE, - NetEaseMusicMvIE, - NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, -) -from .newgrounds import NewgroundsIE -from .newstube import NewstubeIE -from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, - AppleDailyIE, -) -from .nextmovie import NextMovieIE -from .nfb import NFBIE -from .nfl import NFLIE -from .nhl import ( - NHLIE, - NHLNewsIE, - NHLVideocenterIE, -) -from .nick import NickIE -from .niconico import NiconicoIE, NiconicoPlaylistIE -from .ninegag import NineGagIE -from .noco import NocoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE -from .nova import NovaIE -from .novamov import ( - AuroraVidIE, - CloudTimeIE, - NowVideoIE, - VideoWeedIE, - WholeCloudIE, -) -from .nowness import ( - NownessIE, - NownessPlaylistIE, - NownessSeriesIE, -) -from .nowtv import ( - NowTVIE, - NowTVListIE, -) -from .noz import NozIE -from .npo import ( - NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - VPROIE, - WNLIE -) -from .npr import NprIE -from .nrk import ( - NRKIE, - NRKPlaylistIE, - NRKSkoleIE, - NRKTVIE, -) -from .ntvde import NTVDeIE -from .ntvru import NTVRuIE -from .nytimes import ( - NYTimesIE, - NYTimesArticleIE, -) -from .nuvid import NuvidIE -from .odnoklassniki import OdnoklassnikiIE -from .oktoberfesttv import OktoberfestTVIE -from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) -from .openload import OpenloadIE -from .ora import OraTVIE -from .orf import ( - ORFTVthekIE, - ORFOE1IE, - ORFFM4IE, - ORFIPTVIE, -) -from .pandoratv import PandoraTVIE -from .parliamentliveuk import ParliamentLiveUKIE -from .patreon import PatreonIE -from .pbs import PBSIE -from .periscope import PeriscopeIE -from .philharmoniedeparis import PhilharmonieDeParisIE -from .phoenix import PhoenixIE -from .photobucket import PhotobucketIE -from .pinkbike import PinkbikeIE -from .planetaplay import PlanetaPlayIE -from .pladform import PladformIE -from .played import PlayedIE -from .playfm import PlayFMIE -from .plays import PlaysTVIE -from .playtvak import PlaytvakIE -from .playvid import PlayvidIE -from .playwire import PlaywireIE -from .pluralsight import ( - PluralsightIE, - PluralsightCourseIE, -) -from .podomatic import PodomaticIE -from .porn91 import Porn91IE -from .pornhd import PornHdIE -from .pornhub import ( - PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, -) -from .pornotube import PornotubeIE -from .pornovoisines import PornoVoisinesIE -from .pornoxo import PornoXOIE -from .primesharetv import PrimeShareTVIE -from .promptfile import PromptFileIE -from .prosiebensat1 import ProSiebenSat1IE -from .puls4 import Puls4IE -from .pyvideo import PyvideoIE -from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, - QQMusicAlbumIE, - QQMusicToplistIE, - QQMusicPlaylistIE, -) -from .quickvid import QuickVidIE -from .r7 import R7IE -from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE -from .radiofrance import RadioFranceIE -from .rai import ( - RaiTVIE, - RaiIE, -) -from .rbmaradio import RBMARadioIE -from .rds import RDSIE -from .redtube import RedTubeIE -from .regiotv import RegioTVIE -from .restudy import RestudyIE -from .reverbnation import ReverbNationIE -from .revision3 import Revision3IE -from .rice import RICEIE -from .ringtv import RingTVIE -from .ro220 import Ro220IE -from .rottentomatoes import RottenTomatoesIE -from .roxwel import RoxwelIE -from .rtbf import RTBFIE -from .rte import RteIE, RteRadioIE -from .rtlnl import RtlNlIE -from .rtl2 import RTL2IE -from .rtp import RTPIE -from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE -from .rtvnh import RTVNHIE -from .ruhd import RUHDIE -from .ruleporn import RulePornIE -from .rutube import ( - RutubeIE, - RutubeChannelIE, - RutubeEmbedIE, - RutubeMovieIE, - RutubePersonIE, -) -from .rutv import RUTVIE -from .ruutu import RuutuIE -from .sandia import SandiaIE -from .safari import ( - SafariIE, - SafariApiIE, - SafariCourseIE, -) -from .sapo import SapoIE -from .savefrom import SaveFromIE -from .sbs import SBSIE -from .scivee import SciVeeIE -from .screencast import ScreencastIE -from .screencastomatic import ScreencastOMaticIE -from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE -from .senateisvp import SenateISVPIE -from .servingsys import ServingSysIE -from .sexu import SexuIE -from .sexykarma import SexyKarmaIE -from .shahid import ShahidIE -from .shared import SharedIE -from .sharesix import ShareSixIE -from .sina import SinaIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .slideshare import SlideshareIE -from .slutload import SlutloadIE -from .smotri import ( - SmotriIE, - SmotriCommunityIE, - SmotriUserIE, - SmotriBroadcastIE, -) -from .snagfilms import ( - SnagFilmsIE, - SnagFilmsEmbedIE, -) -from .snotr import SnotrIE -from .sohu import SohuIE -from .soundcloud import ( - SoundcloudIE, - SoundcloudSetIE, - SoundcloudUserIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE -) -from .soundgasm import ( - SoundgasmIE, - SoundgasmProfileIE -) -from .southpark import ( - SouthParkIE, - SouthParkDeIE, - SouthParkDkIE, - SouthParkEsIE, - SouthParkNlIE -) -from .spankbang import SpankBangIE -from .spankwire import SpankwireIE -from .spiegel import SpiegelIE, SpiegelArticleIE -from .spiegeltv import SpiegeltvIE -from .spike import SpikeIE -from .stitcher import StitcherIE -from .sport5 import Sport5IE -from .sportbox import ( - SportBoxIE, - SportBoxEmbedIE, -) -from .sportdeutschland import SportDeutschlandIE -from .srgssr import ( - SRGSSRIE, - SRGSSRPlayIE, -) -from .srmediathek import SRMediathekIE -from .ssa import SSAIE -from .stanfordoc import StanfordOpenClassroomIE -from .steam import SteamIE -from .streamcloud import StreamcloudIE -from .streamcz import StreamCZIE -from .streetvoice import StreetVoiceIE -from .sunporno import SunPornoIE -from .svt import ( - SVTIE, - SVTPlayIE, -) -from .swrmediathek import SWRMediathekIE -from .syfy import SyfyIE -from .sztvhu import SztvHuIE -from .tagesschau import TagesschauIE -from .tapely import TapelyIE -from .tass import TassIE -from .teachertube import ( - TeacherTubeIE, - TeacherTubeUserIE, -) -from .teachingchannel import TeachingChannelIE -from .teamcoco import TeamcocoIE -from .techtalks import TechTalksIE -from .ted import TEDIE -from .tele13 import Tele13IE -from .telebruxelles import TeleBruxellesIE -from .telecinco import TelecincoIE -from .telegraaf import TelegraafIE -from .telemb import TeleMBIE -from .teletask import TeleTaskIE -from .testurl import TestURLIE -from .tf1 import TF1IE -from .theintercept import TheInterceptIE -from .theonion import TheOnionIE -from .theplatform import ( - ThePlatformIE, - ThePlatformFeedIE, -) -from .thescene import TheSceneIE -from .thesixtyone import TheSixtyOneIE -from .thestar import TheStarIE -from .thisamericanlife import ThisAmericanLifeIE -from .thisav import ThisAVIE -from .tinypic import TinyPicIE -from .tlc import TlcDeIE -from .tmz import ( - TMZIE, - TMZArticleIE, -) -from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, - EMPFlixIE, - MovieFapIE, -) -from .toggle import ToggleIE -from .thvideo import ( - THVideoIE, - THVideoPlaylistIE -) -from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE -from .traileraddict import TrailerAddictIE -from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE -from .trutube import TruTubeIE -from .tube8 import Tube8IE -from .tubitv import TubiTvIE -from .tudou import ( - TudouIE, - TudouPlaylistIE, - TudouAlbumIE, -) -from .tumblr import TumblrIE -from .tunein import ( - TuneInClipIE, - TuneInStationIE, - TuneInProgramIE, - TuneInTopicIE, - TuneInShortenerIE, -) -from .turbo import TurboIE -from .tutv import TutvIE -from .tv2 import ( - TV2IE, - TV2ArticleIE, -) -from .tv3 import TV3IE -from .tv4 import TV4IE -from .tvc import ( - TVCIE, - TVCArticleIE, -) -from .tvigle import TvigleIE -from .tvland import TVLandIE -from .tvp import TvpIE, TvpSeriesIE -from .tvplay import TVPlayIE -from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE -from .twentymin import TwentyMinutenIE -from .twentytwotracks import ( - TwentyTwoTracksIE, - TwentyTwoTracksGenreIE -) -from .twitch import ( - TwitchVideoIE, - TwitchChapterIE, - TwitchVodIE, - TwitchProfileIE, - TwitchPastBroadcastsIE, - TwitchBookmarksIE, - TwitchStreamIE, -) -from .twitter import ( - TwitterCardIE, - TwitterIE, - TwitterAmplifyIE, -) -from .ubu import UbuIE -from .udemy import ( - UdemyIE, - UdemyCourseIE -) -from .udn import UDNEmbedIE -from .digiteka import DigitekaIE -from .unistra import UnistraIE -from .urort import UrortIE -from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import UstudioIE -from .varzesh3 import Varzesh3IE -from .vbox7 import Vbox7IE -from .veehd import VeeHDIE -from .veoh import VeohIE -from .vessel import VesselIE -from .vesti import VestiIE -from .vevo import VevoIE -from .vgtv import ( - BTArticleIE, - BTVestlendingenIE, - VGTVIE, -) -from .vh1 import VH1IE -from .vice import ( - ViceIE, - ViceShowIE, -) -from .viddler import ViddlerIE -from .videodetective import VideoDetectiveIE -from .videofyme import VideofyMeIE -from .videomega import VideoMegaIE -from .videomore import ( - VideomoreIE, - VideomoreVideoIE, - VideomoreSeasonIE, -) -from .videopremium import VideoPremiumIE -from .videott import VideoTtIE -from .vidme import ( - VidmeIE, - VidmeUserIE, - VidmeUserLikesIE, -) -from .vidzi import VidziIE -from .vier import VierIE, VierVideosIE -from .viewster import ViewsterIE -from .viidea import ViideaIE -from .vimeo import ( - VimeoIE, - VimeoAlbumIE, - VimeoChannelIE, - VimeoGroupsIE, - VimeoLikesIE, - VimeoOndemandIE, - VimeoReviewIE, - VimeoUserIE, - VimeoWatchLaterIE, -) -from .vimple import VimpleIE -from .vine import ( - VineIE, - VineUserIE, -) -from .viki import ( - VikiIE, - VikiChannelIE, -) -from .vk import ( - VKIE, - VKUserVideosIE, -) -from .vlive import VLiveIE -from .vodlocker import VodlockerIE -from .voicerepublic import VoiceRepublicIE -from .voxmedia import VoxMediaIE -from .vporn import VpornIE -from .vrt import VRTIE -from .vube import VubeIE -from .vuclip import VuClipIE -from .vulture import VultureIE -from .walla import WallaIE -from .washingtonpost import WashingtonPostIE -from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE -from .wdr import ( - WDRIE, - WDRMobileIE, - WDRMausIE, -) -from .webofstories import ( - WebOfStoriesIE, - WebOfStoriesPlaylistIE, -) -from .weibo import WeiboIE -from .weiqitv import WeiqiTVIE -from .wimp import WimpIE -from .wistia import WistiaIE -from .worldstarhiphop import WorldStarHipHopIE -from .wrzuta import WrzutaIE -from .wsj import WSJIE -from .xbef import XBefIE -from .xboxclips import XboxClipsIE -from .xfileshare import XFileShareIE -from .xhamster import ( - XHamsterIE, - XHamsterEmbedIE, -) -from .xminus import XMinusIE -from .xnxx import XNXXIE -from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE -from .xvideos import XVideosIE -from .xxxymovies import XXXYMoviesIE -from .yahoo import ( - YahooIE, - YahooSearchIE, -) -from .yam import YamIE -from .yandexmusic import ( - YandexMusicTrackIE, - YandexMusicAlbumIE, - YandexMusicPlaylistIE, -) -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE -from .ynet import YnetIE -from .youjizz import YouJizzIE -from .youku import YoukuIE -from .youporn import YouPornIE -from .yourupload import YourUploadIE -from .youtube import ( - YoutubeIE, - YoutubeChannelIE, - YoutubeFavouritesIE, - YoutubeHistoryIE, - YoutubeLiveIE, - YoutubePlaylistIE, - YoutubePlaylistsIE, - YoutubeRecommendedIE, - YoutubeSearchDateIE, - YoutubeSearchIE, - YoutubeSearchURLIE, - YoutubeShowIE, - YoutubeSubscriptionsIE, - YoutubeTruncatedIDIE, - YoutubeTruncatedURLIE, - YoutubeUserIE, - YoutubeWatchLaterIE, -) -from .zapiks import ZapiksIE -from .zdf import ZDFIE, ZDFChannelIE -from .zingmp3 import ( - ZingMp3SongIE, - ZingMp3AlbumIE, -) -from .zippcast import ZippCastIE - -_ALL_CLASSES = [ - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' -] -_ALL_CLASSES.append(GenericIE) +try: + from .lazy_extractors import * + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True +except ImportError: + _LAZY_LOADER = False + from .extractors import * + + _ALL_CLASSES = [ + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' + ] + _ALL_CLASSES.append(GenericIE) + + +def gen_extractor_classes(): + """ Return a list of supported extractors. + The order does matter; the first extractor matched is the one handling the URL. + """ + return _ALL_CLASSES def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ - return [klass() for klass in _ALL_CLASSES] + return [klass() for klass in gen_extractor_classes()] def list_extractors(age_limit): diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 92eee8119..94ce88c83 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -2,10 +2,14 @@ from __future__ import unicode_literals import re +import functools from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none +from ..utils import ( + int_or_none, + OnDemandPagedList, +) class ACastIE(InfoExtractor): @@ -26,13 +30,8 @@ class ACastIE(InfoExtractor): def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() - - embed_page = self._download_webpage( - re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id) - cast_data = self._parse_json(self._search_regex( - r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'), - display_id)['GetAcast/%s/%s' % (channel, display_id)] - + cast_data = self._download_json( + 'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id) return { 'id': compat_str(cast_data['id']), 'display_id': display_id, @@ -58,15 +57,26 @@ class ACastChannelIE(InfoExtractor): 'playlist_mincount': 20, } _API_BASE_URL = 'https://www.acast.com/api/' + _PAGE_SIZE = 10 @classmethod def suitable(cls, url): return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) - def _real_extract(self, url): - display_id = self._match_id(url) - channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id) - casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id) - entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts] + def _fetch_page(self, channel_slug, page): + casts = self._download_json( + self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), + channel_slug, note='Download page %d of channel data' % page) + for cast in casts: + yield self.url_result( + 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']), + 'ACast', cast['id']) - return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description')) + def _real_extract(self, url): + channel_slug = self._match_id(url) + channel_data = self._download_json( + self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_slug), self._PAGE_SIZE) + return self.playlist_result(entries, compat_str( + channel_data['id']), channel_data['name'], channel_data.get('description')) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 95a99c6b0..24df8fe93 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -1,26 +1,107 @@ +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, +) class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P[0-9]+)(?:$|\?)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P[^/?-]+)' _TESTS = [{ + # video with 5min ID 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'md5': '18ef68f48740e86ae94b98da815eec42', 'info_dict': { 'id': '518167793', 'ext': 'mp4', 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', + 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', + 'timestamp': 1395405060, + 'upload_date': '20140321', + 'uploader': 'Newsy Studio', }, - 'add_ie': ['FiveMin'], + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + # video with vidible ID + 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183', + 'info_dict': { + 'id': '5707d6b8e4b090497b04f706', + 'ext': 'mp4', + 'title': 'Netflix is Raising Rates', + 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', + 'upload_date': '20160408', + 'timestamp': 1460123280, + 'uploader': 'Veuer', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944', + 'only_matching': True, + }, { + 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result('5min:%s' % video_id) + + response = self._download_json( + 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + video_id)['response'] + if response['statusText'] != 'Ok': + raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + + video_data = response['data'] + formats = [] + m3u8_url = video_data.get('videoMasterPlaylist') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for rendition in video_data.get('renditions', []): + video_url = rendition.get('url') + if not video_url: + continue + ext = rendition.get('format') + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + f = { + 'url': video_url, + 'format_id': rendition.get('quality'), + } + mobj = re.search(r'(\d+)x(\d+)', video_url) + if mobj: + f.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + formats.append(f) + self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': video_data['title'], + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('publishDate')), + 'view_count': int_or_none(video_data.get('views')), + 'description': video_data.get('description'), + 'uploader': video_data.get('videoOwner'), + 'formats': formats, + } class AolFeaturesIE(InfoExtractor): diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 9fb84911a..26446c2fe 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -83,7 +83,7 @@ class ARDMediathekIE(InfoExtractor): subtitle_url = media_info.get('_subtitleUrl') if subtitle_url: subtitles['de'] = [{ - 'ext': 'srt', + 'ext': 'ttml', 'url': subtitle_url, }] diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ae0f27dcb..a9e3266dc 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:magazine?/)?(?P[^/?#&]+)' + _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', @@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE): 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', 'upload_date': '20140805', } + }, { + 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', + 'only_matching': True, }] +class ArteTVInfoIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:info' + _VALID_URL = r'https?://info\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' + + _TEST = { + 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', + 'info_dict': { + 'id': '067528-000-A', + 'ext': 'mp4', + 'title': 'Service civique, un cache misère ?', + 'upload_date': '20160403', + }, + } + + class ArteTVFutureIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de|en|es)/(?P[^/?#&]+)' @@ -337,7 +355,7 @@ class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) http://www\.arte\.tv - /playerv2/embed\.php\?json_url= + /(?:playerv2/embed|arte_vp/index)\.php\?json_url= (?P http://arte\.tv/papi/tvguide/videos/stream/player/ (?P[^/]+)/(?P[^/]+)[^&]* diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 3eed91279..a52d26cec 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor): # audiomack wrapper around soundcloud song { 'add_ie': ['Soundcloud'], - 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', + 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle', 'info_dict': { - 'id': '172419696', + 'id': '258901379', 'ext': 'mp3', - 'description': 'md5:1fc3272ed7a635cce5be1568c2822997', - 'title': 'Young Thug ft Lil Wayne - Take Kare', - 'uploader': 'Young Thug World', - 'upload_date': '20141016', + 'description': 'mamba day freestyle for the legend Kobe Bryant ', + 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]', + 'uploader': 'ILOVEMAKONNEN', + 'upload_date': '20160414', } }, ] diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 425f08f2b..74c4510f9 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE): 'info_dict': { 'id': '34475836', 'title': 'Jurgen Klopp: Furious football from a witty and winning coach', + 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.', }, 'playlist_count': 3, }, { diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c718cf385..f0781fc27 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor): ext = 'flv' if ext is None: ext = determine_ext(url) - tbr = int_or_none(rend.get('encodingRate'), 1000), + tbr = int_or_none(rend.get('encodingRate'), 1000) a_format = { 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), 'url': url, diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c621a08d5..051d783a2 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -5,7 +5,6 @@ from ..utils import ( xpath_text, xpath_element, int_or_none, - ExtractorError, find_xpath_attr, ) @@ -64,7 +63,7 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] - TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true' + TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): display_id = self._match_id(url) @@ -84,11 +83,11 @@ class CBSIE(CBSBaseIE): pid = xpath_text(item, 'pid') if not pid: continue - try: - tp_formats, tp_subtitles = self._extract_theplatform_smil( - self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid) - except ExtractorError: - continue + tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid + if '.m3u8' in xpath_text(item, 'contentUrl', default=''): + tp_release_url += '&manifest=m3u' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + tp_release_url, content_id, 'Downloading %s SMIL data' % pid) formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py new file mode 100644 index 000000000..4f9320ea5 --- /dev/null +++ b/youtube_dl/extractor/cliprs.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + parse_iso8601, +) + + +class ClipRsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' + _TEST = { + 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', + 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', + 'info_dict': { + 'id': '1488842.1399140381', + 'ext': 'mp4', + 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', + 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', + 'duration': 229, + 'timestamp': 1459850243, + 'upload_date': '20160405', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'id=(["\'])mvp:(?P.+?)\1', webpage, 'mvp id', group='id') + + response = self._download_json( + 'http://qi.ckm.onetapi.pl/', video_id, + query={ + 'body[id]': video_id, + 'body[jsonrpc]': '2.0', + 'body[method]': 'get_asset_detail', + 'body[params][ID_Publikacji]': video_id, + 'body[params][Service]': 'www.onet.pl', + 'content-type': 'application/jsonp', + 'x-onet-app': 'player.front.onetapi.pl', + }) + + error = response.get('error') + if error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error['message']), expected=True) + + video = response['result'].get('0') + + formats = [] + for _, formats_dict in video['formats'].items(): + if not isinstance(formats_dict, dict): + continue + for format_id, format_list in formats_dict.items(): + if not isinstance(format_list, list): + continue + for f in format_list: + if not f.get('url'): + continue + formats.append({ + 'url': f['url'], + 'format_id': format_id, + 'height': int_or_none(f.get('vertical_resolution')), + 'width': int_or_none(f.get('horizontal_resolution')), + 'abr': float_or_none(f.get('audio_bitrate')), + 'vbr': float_or_none(f.get('video_bitrate')), + }) + self._sort_formats(formats) + + meta = video.get('meta', {}) + + title = self._og_search_title(webpage, default=None) or meta['title'] + description = self._og_search_description(webpage, default=None) or meta.get('description') + duration = meta.get('length') or meta.get('lenght') + timestamp = parse_iso8601(meta.get('addDate'), ' ') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2b40f3b7c..02cd2c003 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -376,14 +376,13 @@ class InfoExtractor(object): self.to_screen('%s' % (note,)) else: self.to_screen('%s: %s' % (video_id, note)) - # data, headers and query params will be ignored for `Request` objects if isinstance(url_or_request, compat_urllib_request.Request): url_or_request = update_Request( url_or_request, data=data, headers=headers, query=query) else: if query: url_or_request = update_url_query(url_or_request, query) - if data or headers: + if data is not None or headers: url_or_request = sanitized_Request(url_or_request, data, headers) try: return self._downloader.urlopen(url_or_request) @@ -843,7 +842,7 @@ class InfoExtractor(object): for input in re.findall(r'(?i)]+)>', html): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): continue - name = re.search(r'name=(["\'])(?P.+?)\1', input) + name = re.search(r'(?:name|id)=(["\'])(?P.+?)\1', input) if not name: continue value = re.search(r'value=(["\'])(?P.*?)\1', input) @@ -1534,7 +1533,7 @@ class InfoExtractor(object): media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) - media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) media_template.replace('$$', '$') representation_ms_info['segment_urls'] = [ media_template % { diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 6cd395e11..65a98d789 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -17,37 +17,53 @@ class DemocracynowIE(InfoExtractor): IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', + 'md5': '3757c182d3d84da68f5c8f506c18c196', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', - 'title': 'July 03, 2015 - Democracy Now!', - 'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', + 'title': 'Daily Show', }, }, { 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - description = self._og_search_description(webpage) json_data = self._parse_json(self._search_regex( r']+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), display_id) - video_id = None + + title = json_data['title'] formats = [] - default_lang = 'en' + video_id = None + + for key in ('file', 'audio', 'video', 'high_res_video'): + media_url = json_data.get(key, '') + if not media_url: + continue + media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) + video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') + formats.append({ + 'url': media_url, + 'vcodec': 'none' if key == 'audio' else None, + }) + + self._sort_formats(formats) + default_lang = 'en' subtitles = {} def add_subtitle_item(lang, info_dict): @@ -67,22 +83,13 @@ class DemocracynowIE(InfoExtractor): 'url': compat_urlparse.urljoin(url, subtitle_item['url']), }) - for key in ('file', 'audio', 'video'): - media_url = json_data.get(key, '') - if not media_url: - continue - media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) - video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') - formats.append({ - 'url': media_url, - }) - - self._sort_formats(formats) + description = self._og_search_description(webpage, default=None) return { 'id': video_id or display_id, - 'title': json_data['title'], + 'title': title, 'description': description, + 'thumbnail': json_data.get('image'), 'subtitles': subtitles, 'formats': formats, } diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py new file mode 100644 index 000000000..c74fa7e07 --- /dev/null +++ b/youtube_dl/extractor/dispeak.py @@ -0,0 +1,114 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + remove_end, + xpath_element, + xpath_text, +) + + +class DigitalSpeakingIE(InfoExtractor): + _VALID_URL = r'http://(?:evt\.dispeak|events\.digitallyspeaking)\.com/([^/]+/)+xml/(?P[^.]+).xml' + + _TESTS = [{ + # From http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml + 'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml', + 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', + 'info_dict': { + 'id': '840376_BQRC', + 'ext': 'mp4', + 'title': 'Tenacious Design and The Interface of \'Destiny\'', + }, + }, { + # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC + 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', + 'only_matching': True, + }] + + def _parse_mp4(self, metadata): + video_formats = [] + video_root = None + + mp4_video = xpath_text(metadata, './mp4video', default=None) + if mp4_video is not None: + mobj = re.match(r'(?Phttps?://.*?/).*', mp4_video) + video_root = mobj.group('root') + if video_root is None: + http_host = xpath_text(metadata, 'httpHost', default=None) + if http_host: + video_root = 'http://%s/' % http_host + if video_root is None: + # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js + # Works for GPUTechConf, too + video_root = 'http://s3-2u.digitallyspeaking.com/' + + formats = metadata.findall('./MBRVideos/MBRVideo') + if not formats: + return None + for a_format in formats: + stream_name = xpath_text(a_format, 'streamName', fatal=True) + video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') + url = video_root + video_path + vbr = xpath_text(a_format, 'bitrate') + video_formats.append({ + 'url': url, + 'vbr': int_or_none(vbr), + }) + return video_formats + + def _parse_flv(self, metadata): + formats = [] + akamai_url = xpath_text(metadata, './akamaiHost', fatal=True) + audios = metadata.findall('./audios/audio') + for audio in audios: + formats.append({ + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'play_path': remove_end(audio.get('url'), '.flv'), + 'ext': 'flv', + 'vcodec': 'none', + 'format_id': audio.get('code'), + }) + slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) + formats.append({ + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'play_path': remove_end(slide_video_path, '.flv'), + 'ext': 'flv', + 'format_note': 'slide deck video', + 'quality': -2, + 'preference': -2, + 'format_id': 'slides', + }) + speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) + formats.append({ + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'play_path': remove_end(speaker_video_path, '.flv'), + 'ext': 'flv', + 'format_note': 'speaker video', + 'quality': -1, + 'preference': -1, + 'format_id': 'speaker', + }) + return formats + + def _real_extract(self, url): + video_id = self._match_id(url) + + xml_description = self._download_xml(url, video_id) + metadata = xpath_element(xml_description, 'metadata') + + video_formats = self._parse_mp4(metadata) + if video_formats is None: + video_formats = self._parse_flv(metadata) + + return { + 'id': video_id, + 'formats': video_formats, + 'title': xpath_text(metadata, 'title', fatal=True), + 'duration': parse_duration(xpath_text(metadata, 'endTime')), + 'creator': xpath_text(metadata, 'speaker'), + } diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7bbf617d4..fa3cb7023 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, @@ -55,8 +56,13 @@ class EaglePlatformIE(InfoExtractor): raise ExtractorError(' '.join(response['errors']), expected=True) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): - response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) - self._handle_error(response) + try: + response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError): + response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) + self._handle_error(response) + raise return response def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py index b6bfd2b2d..c97682cd3 100644 --- a/youtube_dl/extractor/ebaumsworld.py +++ b/youtube_dl/extractor/ebaumsworld.py @@ -4,10 +4,10 @@ from .common import InfoExtractor class EbaumsWorldIE(InfoExtractor): - _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P\d+)' _TEST = { - 'url': 'http://www.ebaumsworld.com/video/watch/83367677/', + 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/', 'info_dict': { 'id': '83367677', 'ext': 'mp4', diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py new file mode 100644 index 000000000..d28f9e863 --- /dev/null +++ b/youtube_dl/extractor/extractors.py @@ -0,0 +1,996 @@ +# flake8: noqa +from __future__ import unicode_literals + +from .abc import ABCIE +from .abc7news import Abc7NewsIE +from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) +from .addanime import AddAnimeIE +from .adobetv import ( + AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, + AdobeTVVideoIE, +) +from .adultswim import AdultSwimIE +from .aenetworks import AENetworksIE +from .aftonbladet import AftonbladetIE +from .airmozilla import AirMozillaIE +from .aljazeera import AlJazeeraIE +from .alphaporno import AlphaPornoIE +from .animeondemand import AnimeOnDemandIE +from .anitube import AnitubeIE +from .anysex import AnySexIE +from .aol import ( + AolIE, + AolFeaturesIE, +) +from .allocine import AllocineIE +from .aparat import AparatIE +from .appleconnect import AppleConnectIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) +from .archiveorg import ArchiveOrgIE +from .ard import ( + ARDIE, + ARDMediathekIE, + SportschauIE, +) +from .arte import ( + ArteTvIE, + ArteTVPlus7IE, + ArteTVCreativeIE, + ArteTVConcertIE, + ArteTVInfoIE, + ArteTVFutureIE, + ArteTVCinemaIE, + ArteTVDDCIE, + ArteTVMagazineIE, + ArteTVEmbedIE, +) +from .atresplayer import AtresPlayerIE +from .atttechchannel import ATTTechChannelIE +from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE +from .audiomack import AudiomackIE, AudiomackAlbumIE +from .azubu import AzubuIE, AzubuLiveIE +from .baidu import BaiduVideoIE +from .bambuser import BambuserIE, BambuserChannelIE +from .bandcamp import BandcampIE, BandcampAlbumIE +from .bbc import ( + BBCCoUkIE, + BBCCoUkArticleIE, + BBCIE, +) +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .beatportpro import BeatportProIE +from .bet import BetIE +from .bigflix import BigflixIE +from .bild import BildIE +from .bilibili import BiliBiliIE +from .biobiochiletv import BioBioChileTVIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) +from .blinkx import BlinkxIE +from .bloomberg import BloombergIE +from .bokecc import BokeCCIE +from .bpb import BpbIE +from .br import BRIE +from .bravotv import BravoTVIE +from .breakcom import BreakIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) +from .buzzfeed import BuzzFeedIE +from .byutv import BYUtvIE +from .c56 import C56IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) +from .camwithher import CamWithHerIE +from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE +from .canvas import CanvasIE +from .cbc import ( + CBCIE, + CBCPlayerIE, +) +from .cbs import CBSIE +from .cbsinteractive import CBSInteractiveIE +from .cbsnews import ( + CBSNewsIE, + CBSNewsLiveVideoIE, +) +from .cbssports import CBSSportsIE +from .ccc import CCCIE +from .cda import CDAIE +from .ceskatelevize import CeskaTelevizeIE +from .channel9 import Channel9IE +from .chaturbate import ChaturbateIE +from .chilloutzone import ChilloutzoneIE +from .chirbit import ( + ChirbitIE, + ChirbitProfileIE, +) +from .cinchcast import CinchcastIE +from .cinemassacre import CinemassacreIE +from .cliprs import ClipRsIE +from .clipfish import ClipfishIE +from .cliphunter import CliphunterIE +from .clipsyndicate import ClipsyndicateIE +from .cloudy import CloudyIE +from .clubic import ClubicIE +from .clyp import ClypIE +from .cmt import CMTIE +from .cnbc import CNBCIE +from .cnn import ( + CNNIE, + CNNBlogsIE, + CNNArticleIE, +) +from .collegehumor import CollegeHumorIE +from .collegerama import CollegeRamaIE +from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE +from .comcarcoff import ComCarCoffIE +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonprotocols import RtmpIE +from .condenast import CondeNastIE +from .cracked import CrackedIE +from .crackle import CrackleIE +from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE +from .crunchyroll import ( + CrunchyrollIE, + CrunchyrollShowPlaylistIE +) +from .cspan import CSpanIE +from .ctsnews import CtsNewsIE +from .cultureunplugged import CultureUnpluggedIE +from .cwtv import CWTVIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, + DailymotionCloudIE, +) +from .daum import ( + DaumIE, + DaumClipIE, + DaumPlaylistIE, + DaumUserIE, +) +from .dbtv import DBTVIE +from .dcn import ( + DCNIE, + DCNVideoIE, + DCNLiveIE, + DCNSeasonIE, +) +from .dctp import DctpTvIE +from .deezer import DeezerPlaylistIE +from .democracynow import DemocracynowIE +from .dfb import DFBIE +from .dhm import DHMIE +from .dotsub import DotsubIE +from .douyutv import DouyuTVIE +from .dplay import DPlayIE +from .dramafever import ( + DramaFeverIE, + DramaFeverSeriesIE, +) +from .dreisat import DreiSatIE +from .drbonanza import DRBonanzaIE +from .drtuber import DrTuberIE +from .drtv import DRTVIE +from .dvtv import DVTVIE +from .dump import DumpIE +from .dumpert import DumpertIE +from .defense import DefenseGouvFrIE +from .discovery import DiscoveryIE +from .dispeak import DigitalSpeakingIE +from .dropbox import DropboxIE +from .dw import ( + DWIE, + DWArticleIE, +) +from .eagleplatform import EaglePlatformIE +from .ebaumsworld import EbaumsWorldIE +from .echomsk import EchoMskIE +from .ehow import EHowIE +from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE +from .eitb import EitbIE +from .ellentv import ( + EllenTVIE, + EllenTVClipsIE, +) +from .elpais import ElPaisIE +from .embedly import EmbedlyIE +from .engadget import EngadgetIE +from .eporner import EpornerIE +from .eroprofile import EroProfileIE +from .escapist import EscapistIE +from .espn import ESPNIE +from .esri import EsriVideoIE +from .europa import EuropaIE +from .everyonesmixtape import EveryonesMixtapeIE +from .exfm import ExfmIE +from .expotv import ExpoTVIE +from .extremetube import ExtremeTubeIE +from .facebook import FacebookIE +from .faz import FazIE +from .fc2 import FC2IE +from .fczenit import FczenitIE +from .firstpost import FirstpostIE +from .firsttv import FirstTVIE +from .fivemin import FiveMinIE +from .fivetv import FiveTVIE +from .fktv import FKTVIE +from .flickr import FlickrIE +from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE +from .fourtube import FourTubeIE +from .fox import FOXIE +from .foxgay import FoxgayIE +from .foxnews import FoxNewsIE +from .foxsports import FoxSportsIE +from .franceculture import ( + FranceCultureIE, + FranceCultureEmissionIE, +) +from .franceinter import FranceInterIE +from .francetv import ( + PluzzIE, + FranceTvInfoIE, + FranceTVIE, + GenerationQuoiIE, + CultureboxIE, +) +from .freesound import FreesoundIE +from .freespeech import FreespeechIE +from .freevideo import FreeVideoIE +from .funimation import FunimationIE +from .funnyordie import FunnyOrDieIE +from .gameinformer import GameInformerIE +from .gamekings import GamekingsIE +from .gameone import ( + GameOneIE, + GameOnePlaylistIE, +) +from .gamersyde import GamersydeIE +from .gamespot import GameSpotIE +from .gamestar import GameStarIE +from .gametrailers import GametrailersIE +from .gazeta import GazetaIE +from .gdcvault import GDCVaultIE +from .generic import GenericIE +from .gfycat import GfycatIE +from .giantbomb import GiantBombIE +from .giga import GigaIE +from .glide import GlideIE +from .globo import ( + GloboIE, + GloboArticleIE, +) +from .godtube import GodTubeIE +from .goldenmoustache import GoldenMoustacheIE +from .golem import GolemIE +from .googledrive import GoogleDriveIE +from .googleplus import GooglePlusIE +from .googlesearch import GoogleSearchIE +from .goshgay import GoshgayIE +from .gputechconf import GPUTechConfIE +from .groupon import GrouponIE +from .hark import HarkIE +from .hbo import HBOIE +from .hearthisat import HearThisAtIE +from .heise import HeiseIE +from .hellporno import HellPornoIE +from .helsinki import HelsinkiIE +from .hentaistigma import HentaiStigmaIE +from .historicfilms import HistoricFilmsIE +from .hitbox import HitboxIE, HitboxLiveIE +from .hornbunny import HornBunnyIE +from .hotnewhiphop import HotNewHipHopIE +from .hotstar import HotStarIE +from .howcast import HowcastIE +from .howstuffworks import HowStuffWorksIE +from .huffpost import HuffPostIE +from .hypem import HypemIE +from .iconosquare import IconosquareIE +from .ign import ( + IGNIE, + OneUPIE, + PCMagIE, +) +from .imdb import ( + ImdbIE, + ImdbListIE +) +from .imgur import ( + ImgurIE, + ImgurAlbumIE, +) +from .ina import InaIE +from .indavideo import ( + IndavideoIE, + IndavideoEmbedIE, +) +from .infoq import InfoQIE +from .instagram import InstagramIE, InstagramUserIE +from .internetvideoarchive import InternetVideoArchiveIE +from .iprima import IPrimaIE +from .iqiyi import IqiyiIE +from .ir90tv import Ir90TvIE +from .ivi import ( + IviIE, + IviCompilationIE +) +from .ivideon import IvideonIE +from .izlesene import IzleseneIE +from .jeuxvideo import JeuxVideoIE +from .jove import JoveIE +from .jwplatform import JWPlatformIE +from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE +from .kanalplay import KanalPlayIE +from .kankan import KankanIE +from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE +from .keezmovies import KeezMoviesIE +from .khanacademy import KhanAcademyIE +from .kickstarter import KickStarterIE +from .keek import KeekIE +from .konserthusetplay import KonserthusetPlayIE +from .kontrtube import KontrTubeIE +from .krasview import KrasViewIE +from .ku6 import Ku6IE +from .kusi import KUSIIE +from .kuwo import ( + KuwoIE, + KuwoAlbumIE, + KuwoChartIE, + KuwoSingerIE, + KuwoCategoryIE, + KuwoMvIE, +) +from .la7 import LA7IE +from .laola1tv import Laola1TvIE +from .lecture2go import Lecture2GoIE +from .lemonde import LemondeIE +from .leeco import ( + LeIE, + LePlaylistIE, + LetvCloudIE, +) +from .libsyn import LibsynIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) +from .limelight import ( + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, +) +from .liveleak import LiveLeakIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) +from .lnkgo import LnkGoIE +from .lovehomeporn import LoveHomePornIE +from .lrt import LRTIE +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .m6 import M6IE +from .macgamestore import MacGameStoreIE +from .mailru import MailRuIE +from .makerschannel import MakersChannelIE +from .makertv import MakerTVIE +from .malemotion import MalemotionIE +from .matchtv import MatchTVIE +from .mdr import MDRIE +from .metacafe import MetacafeIE +from .metacritic import MetacriticIE +from .mgoon import MgoonIE +from .mgtv import MGTVIE +from .minhateca import MinhatecaIE +from .ministrygrid import MinistryGridIE +from .minoto import MinotoIE +from .miomio import MioMioIE +from .mit import TechTVMITIE, MITIE, OCWMITIE +from .mitele import MiTeleIE +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, + MixcloudStreamIE, +) +from .mlb import MLBIE +from .mnet import MnetIE +from .mpora import MporaIE +from .moevideo import MoeVideoIE +from .mofosex import MofosexIE +from .mojvideo import MojvideoIE +from .moniker import MonikerIE +from .morningstar import MorningstarIE +from .motherless import MotherlessIE +from .motorsport import MotorsportIE +from .movieclips import MovieClipsIE +from .moviezine import MoviezineIE +from .mtv import ( + MTVIE, + MTVServicesEmbeddedIE, + MTVIggyIE, + MTVDEIE, +) +from .muenchentv import MuenchenTVIE +from .musicplayon import MusicPlayOnIE +from .muzu import MuzuTVIE +from .mwave import MwaveIE +from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspass import MySpassIE +from .myvi import MyviIE +from .myvideo import MyVideoIE +from .myvidster import MyVidsterIE +from .nationalgeographic import ( + NationalGeographicIE, + NationalGeographicChannelIE, +) +from .naver import NaverIE +from .nba import NBAIE +from .nbc import ( + CSNNEIE, + NBCIE, + NBCNewsIE, + NBCSportsIE, + NBCSportsVPlayerIE, + MSNBCIE, +) +from .ndr import ( + NDRIE, + NJoyIE, + NDREmbedBaseIE, + NDREmbedIE, + NJoyEmbedIE, +) +from .ndtv import NDTVIE +from .netzkino import NetzkinoIE +from .nerdcubed import NerdCubedFeedIE +from .neteasemusic import ( + NetEaseMusicIE, + NetEaseMusicAlbumIE, + NetEaseMusicSingerIE, + NetEaseMusicListIE, + NetEaseMusicMvIE, + NetEaseMusicProgramIE, + NetEaseMusicDjRadioIE, +) +from .newgrounds import NewgroundsIE +from .newstube import NewstubeIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyIE, +) +from .nextmovie import NextMovieIE +from .nfb import NFBIE +from .nfl import NFLIE +from .nhl import ( + NHLIE, + NHLNewsIE, + NHLVideocenterIE, +) +from .nick import NickIE +from .niconico import NiconicoIE, NiconicoPlaylistIE +from .ninegag import NineGagIE +from .noco import NocoIE +from .normalboots import NormalbootsIE +from .nosvideo import NosVideoIE +from .nova import NovaIE +from .novamov import ( + AuroraVidIE, + CloudTimeIE, + NowVideoIE, + VideoWeedIE, + WholeCloudIE, +) +from .nowness import ( + NownessIE, + NownessPlaylistIE, + NownessSeriesIE, +) +from .nowtv import ( + NowTVIE, + NowTVListIE, +) +from .noz import NozIE +from .npo import ( + NPOIE, + NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, + SchoolTVIE, + VPROIE, + WNLIE +) +from .npr import NprIE +from .nrk import ( + NRKIE, + NRKPlaylistIE, + NRKSkoleIE, + NRKTVIE, +) +from .ntvde import NTVDeIE +from .ntvru import NTVRuIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, +) +from .nuvid import NuvidIE +from .odnoklassniki import OdnoklassnikiIE +from .oktoberfesttv import OktoberfestTVIE +from .onionstudios import OnionStudiosIE +from .ooyala import ( + OoyalaIE, + OoyalaExternalIE, +) +from .openload import OpenloadIE +from .ora import OraTVIE +from .orf import ( + ORFTVthekIE, + ORFOE1IE, + ORFFM4IE, + ORFIPTVIE, +) +from .pandoratv import PandoraTVIE +from .parliamentliveuk import ParliamentLiveUKIE +from .patreon import PatreonIE +from .pbs import PBSIE +from .people import PeopleIE +from .periscope import PeriscopeIE +from .philharmoniedeparis import PhilharmonieDeParisIE +from .phoenix import PhoenixIE +from .photobucket import PhotobucketIE +from .pinkbike import PinkbikeIE +from .planetaplay import PlanetaPlayIE +from .pladform import PladformIE +from .played import PlayedIE +from .playfm import PlayFMIE +from .plays import PlaysTVIE +from .playtvak import PlaytvakIE +from .playvid import PlayvidIE +from .playwire import PlaywireIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) +from .podomatic import PodomaticIE +from .porn91 import Porn91IE +from .pornhd import PornHdIE +from .pornhub import ( + PornHubIE, + PornHubPlaylistIE, + PornHubUserVideosIE, +) +from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE +from .pornoxo import PornoXOIE +from .presstv import PressTVIE +from .primesharetv import PrimeShareTVIE +from .promptfile import PromptFileIE +from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE +from .pyvideo import PyvideoIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, + QQMusicToplistIE, + QQMusicPlaylistIE, +) +from .r7 import R7IE +from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE +from .radiobremen import RadioBremenIE +from .radiofrance import RadioFranceIE +from .rai import ( + RaiTVIE, + RaiIE, +) +from .rbmaradio import RBMARadioIE +from .rds import RDSIE +from .redtube import RedTubeIE +from .regiotv import RegioTVIE +from .restudy import RestudyIE +from .reverbnation import ReverbNationIE +from .revision3 import Revision3IE +from .rice import RICEIE +from .ringtv import RingTVIE +from .ro220 import Ro220IE +from .rottentomatoes import RottenTomatoesIE +from .roxwel import RoxwelIE +from .rtbf import RTBFIE +from .rte import RteIE, RteRadioIE +from .rtlnl import RtlNlIE +from .rtl2 import RTL2IE +from .rtp import RTPIE +from .rts import RTSIE +from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtvnh import RTVNHIE +from .ruhd import RUHDIE +from .ruleporn import RulePornIE +from .rutube import ( + RutubeIE, + RutubeChannelIE, + RutubeEmbedIE, + RutubeMovieIE, + RutubePersonIE, +) +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .sandia import SandiaIE +from .safari import ( + SafariIE, + SafariApiIE, + SafariCourseIE, +) +from .sapo import SapoIE +from .savefrom import SaveFromIE +from .sbs import SBSIE +from .scivee import SciVeeIE +from .screencast import ScreencastIE +from .screencastomatic import ScreencastOMaticIE +from .screenjunkies import ScreenJunkiesIE +from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE +from .senateisvp import SenateISVPIE +from .servingsys import ServingSysIE +from .sexu import SexuIE +from .sexykarma import SexyKarmaIE +from .shahid import ShahidIE +from .shared import SharedIE +from .sharesix import ShareSixIE +from .sina import SinaIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) +from .slideshare import SlideshareIE +from .slutload import SlutloadIE +from .smotri import ( + SmotriIE, + SmotriCommunityIE, + SmotriUserIE, + SmotriBroadcastIE, +) +from .snagfilms import ( + SnagFilmsIE, + SnagFilmsEmbedIE, +) +from .snotr import SnotrIE +from .sohu import SohuIE +from .soundcloud import ( + SoundcloudIE, + SoundcloudSetIE, + SoundcloudUserIE, + SoundcloudPlaylistIE, + SoundcloudSearchIE +) +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) +from .southpark import ( + SouthParkIE, + SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, + SouthParkNlIE +) +from .spankbang import SpankBangIE +from .spankwire import SpankwireIE +from .spiegel import SpiegelIE, SpiegelArticleIE +from .spiegeltv import SpiegeltvIE +from .spike import SpikeIE +from .stitcher import StitcherIE +from .sport5 import Sport5IE +from .sportbox import ( + SportBoxIE, + SportBoxEmbedIE, +) +from .sportdeutschland import SportDeutschlandIE +from .srgssr import ( + SRGSSRIE, + SRGSSRPlayIE, +) +from .srmediathek import SRMediathekIE +from .ssa import SSAIE +from .stanfordoc import StanfordOpenClassroomIE +from .steam import SteamIE +from .streamcloud import StreamcloudIE +from .streamcz import StreamCZIE +from .streetvoice import StreetVoiceIE +from .sunporno import SunPornoIE +from .svt import ( + SVTIE, + SVTPlayIE, +) +from .swrmediathek import SWRMediathekIE +from .syfy import SyfyIE +from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE +from .tapely import TapelyIE +from .tass import TassIE +from .tdslifeway import TDSLifewayIE +from .teachertube import ( + TeacherTubeIE, + TeacherTubeUserIE, +) +from .teachingchannel import TeachingChannelIE +from .teamcoco import TeamcocoIE +from .techtalks import TechTalksIE +from .ted import TEDIE +from .tele13 import Tele13IE +from .telebruxelles import TeleBruxellesIE +from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE +from .telemb import TeleMBIE +from .teletask import TeleTaskIE +from .testurl import TestURLIE +from .tf1 import TF1IE +from .theintercept import TheInterceptIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) +from .thescene import TheSceneIE +from .thesixtyone import TheSixtyOneIE +from .thestar import TheStarIE +from .thisamericanlife import ThisAmericanLifeIE +from .thisav import ThisAVIE +from .tinypic import TinyPicIE +from .tlc import TlcDeIE +from .tmz import ( + TMZIE, + TMZArticleIE, +) +from .tnaflix import ( + TNAFlixNetworkEmbedIE, + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) +from .toggle import ToggleIE +from .thvideo import ( + THVideoIE, + THVideoPlaylistIE +) +from .toutv import TouTvIE +from .toypics import ToypicsUserIE, ToypicsIE +from .traileraddict import TrailerAddictIE +from .trilulilu import TriluliluIE +from .trollvids import TrollvidsIE +from .trutube import TruTubeIE +from .tube8 import Tube8IE +from .tubitv import TubiTvIE +from .tudou import ( + TudouIE, + TudouPlaylistIE, + TudouAlbumIE, +) +from .tumblr import TumblrIE +from .tunein import ( + TuneInClipIE, + TuneInStationIE, + TuneInProgramIE, + TuneInTopicIE, + TuneInShortenerIE, +) +from .turbo import TurboIE +from .tutv import TutvIE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) +from .tv3 import TV3IE +from .tv4 import TV4IE +from .tvc import ( + TVCIE, + TVCArticleIE, +) +from .tvigle import TvigleIE +from .tvland import TVLandIE +from .tvp import TvpIE, TvpSeriesIE +from .tvplay import TVPlayIE +from .tweakers import TweakersIE +from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE +from .twentytwotracks import ( + TwentyTwoTracksIE, + TwentyTwoTracksGenreIE +) +from .twitch import ( + TwitchVideoIE, + TwitchChapterIE, + TwitchVodIE, + TwitchProfileIE, + TwitchPastBroadcastsIE, + TwitchBookmarksIE, + TwitchStreamIE, +) +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, +) +from .udemy import ( + UdemyIE, + UdemyCourseIE +) +from .udn import UDNEmbedIE +from .digiteka import DigitekaIE +from .unistra import UnistraIE +from .urort import UrortIE +from .usatoday import USATodayIE +from .ustream import UstreamIE, UstreamChannelIE +from .ustudio import UstudioIE +from .varzesh3 import Varzesh3IE +from .vbox7 import Vbox7IE +from .veehd import VeeHDIE +from .veoh import VeohIE +from .vessel import VesselIE +from .vesti import VestiIE +from .vevo import VevoIE +from .vgtv import ( + BTArticleIE, + BTVestlendingenIE, + VGTVIE, +) +from .vh1 import VH1IE +from .vice import ( + ViceIE, + ViceShowIE, +) +from .viddler import ViddlerIE +from .videodetective import VideoDetectiveIE +from .videofyme import VideofyMeIE +from .videomega import VideoMegaIE +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) +from .videopremium import VideoPremiumIE +from .videott import VideoTtIE +from .vidme import ( + VidmeIE, + VidmeUserIE, + VidmeUserLikesIE, +) +from .vidzi import VidziIE +from .vier import VierIE, VierVideosIE +from .viewster import ViewsterIE +from .viidea import ViideaIE +from .vimeo import ( + VimeoIE, + VimeoAlbumIE, + VimeoChannelIE, + VimeoGroupsIE, + VimeoLikesIE, + VimeoOndemandIE, + VimeoReviewIE, + VimeoUserIE, + VimeoWatchLaterIE, +) +from .vimple import VimpleIE +from .vine import ( + VineIE, + VineUserIE, +) +from .viki import ( + VikiIE, + VikiChannelIE, +) +from .vk import ( + VKIE, + VKUserVideosIE, +) +from .vlive import VLiveIE +from .vodlocker import VodlockerIE +from .voicerepublic import VoiceRepublicIE +from .voxmedia import VoxMediaIE +from .vporn import VpornIE +from .vrt import VRTIE +from .vube import VubeIE +from .vuclip import VuClipIE +from .vulture import VultureIE +from .walla import WallaIE +from .washingtonpost import WashingtonPostIE +from .wat import WatIE +from .wdr import ( + WDRIE, + WDRMobileIE, + WDRMausIE, +) +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) +from .weibo import WeiboIE +from .weiqitv import WeiqiTVIE +from .wimp import WimpIE +from .wistia import WistiaIE +from .worldstarhiphop import WorldStarHipHopIE +from .wrzuta import WrzutaIE +from .wsj import WSJIE +from .xbef import XBefIE +from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE +from .xhamster import ( + XHamsterIE, + XHamsterEmbedIE, +) +from .xminus import XMinusIE +from .xnxx import XNXXIE +from .xstream import XstreamIE +from .xtube import XTubeUserIE, XTubeIE +from .xuite import XuiteIE +from .xvideos import XVideosIE +from .xxxymovies import XXXYMoviesIE +from .yahoo import ( + YahooIE, + YahooSearchIE, +) +from .yam import YamIE +from .yandexmusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, +) +from .yesjapan import YesJapanIE +from .yinyuetai import YinYueTaiIE +from .ynet import YnetIE +from .youjizz import YouJizzIE +from .youku import YoukuIE +from .youporn import YouPornIE +from .yourupload import YourUploadIE +from .youtube import ( + YoutubeIE, + YoutubeChannelIE, + YoutubeFavouritesIE, + YoutubeHistoryIE, + YoutubeLiveIE, + YoutubePlaylistIE, + YoutubePlaylistsIE, + YoutubeRecommendedIE, + YoutubeSearchDateIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeShowIE, + YoutubeSubscriptionsIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeUserIE, + YoutubeWatchLaterIE, +) +from .zapiks import ZapiksIE +from .zdf import ZDFIE, ZDFChannelIE +from .zingmp3 import ( + ZingMp3SongIE, + ZingMp3AlbumIE, +) +from .zippcast import ZippCastIE diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 98b165143..88bca1007 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,78 +2,133 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import compat_xpath +from ..utils import ( + int_or_none, + qualities, + unified_strdate, + xpath_attr, + xpath_element, + xpath_text, + xpath_with_ns, +) class FirstTVIE(InfoExtractor): IE_NAME = '1tv' IE_DESC = 'Первый канал' - _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P.+)' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P\d+)' _TESTS = [{ - 'url': 'http://www.1tv.ru/videoarchive/73390', - 'md5': '777f525feeec4806130f4f764bc18a4f', + # single format via video_materials.json API + 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', + 'md5': '82a2777648acae812d58b3f5bd42882b', 'info_dict': { - 'id': '73390', + 'id': '35930', 'ext': 'mp4', - 'title': 'Олимпийские канатные дороги', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', + 'description': 'md5:357933adeede13b202c7c21f91b871b2', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'duration': 149, - 'like_count': int, - 'dislike_count': int, + 'upload_date': '20150212', + 'duration': 2694, }, - 'skip': 'Only works from Russia', }, { - 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', - 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', + # multiple formats via video_materials.json API + 'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641', 'info_dict': { - 'id': '35930', + 'id': '113641', 'ext': 'mp4', - 'title': 'Наедине со всеми. Людмила Сенчина', - 'description': 'md5:89553aed1d641416001fe8d450f06cb9', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', + 'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'duration': 2694, + 'upload_date': '20160407', + 'duration': 179, + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Only works from Russia', + }, { + # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API + 'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038', + 'md5': '519d306c5b5669761fd8906c39dbee23', + 'info_dict': { + 'id': '47038', + 'ext': 'mp4', + 'title': '"Побег". Второй сезон. 3 серия', + 'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20120516', + 'duration': 3080, + }, + }, { + 'url': 'http://www.1tv.ru/videoarchive/9967', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, 'Downloading page') + # Videos with multiple formats only available via this API + video = self._download_json( + 'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id, + video_id, fatal=False) - video_url = self._html_search_regex( - r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', - webpage, 'video URL') + description, thumbnail, upload_date, duration = [None] * 4 - title = self._html_search_regex( - [r'
\s*

([^<]*)', - r"'title'\s*:\s*'([^']+)'"], webpage, 'title') - description = self._html_search_regex( - r'
\s*
 
\s*

([^<]*)

', - webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') + if video: + item = video[0] + title = item['title'] + quality = qualities(('ld', 'sd', 'hd', )) + formats = [{ + 'url': f['src'], + 'format_id': f.get('name'), + 'quality': quality(f.get('name')), + } for f in item['mbr'] if f.get('src')] + thumbnail = item.get('poster') + else: + # Some videos are not available via video_materials.json + video = self._download_xml( + 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id, + video_id) + + NS_MAP = { + 'media': 'http://search.yahoo.com/mrss/', + } - thumbnail = self._og_search_thumbnail(webpage) - duration = self._og_search_property( - 'video:duration', webpage, - 'video duration', fatal=False) + item = xpath_element(video, './channel/item', fatal=True) + title = xpath_text(item, './title', fatal=True) + formats = [{ + 'url': content.attrib['url'], + } for content in item.findall( + compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')] + thumbnail = xpath_attr( + item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url') - like_count = self._html_search_regex( - r'title="Понравилось".*?/> \[(\d+)\]', - webpage, 'like count', default=None) - dislike_count = self._html_search_regex( - r'title="Не понравилось".*?/> \[(\d+)\]', - webpage, 'dislike count', default=None) + self._sort_formats(formats) + + webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False) + if webpage: + title = self._html_search_regex( + (r'
\s*

([^<]*)', + r"'title'\s*:\s*'([^']+)'"), + webpage, 'title', default=None) or title + description = self._html_search_regex( + r'
\s*
 
\s*

([^<]*)

', + webpage, 'description', default=None) or self._html_search_meta( + 'description', webpage, 'description') + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'url': video_url, 'thumbnail': thumbnail, 'title': title, 'description': description, + 'upload_date': upload_date, 'duration': int_or_none(duration), - 'like_count': int_or_none(like_count), - 'dislike_count': int_or_none(dislike_count), + 'formats': formats } diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 4c4a87e2a..8c5ffc9e8 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -46,8 +46,8 @@ class FunnyOrDieIE(InfoExtractor): links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) m3u8_url = self._search_regex( - r']+src=(["\'])(?P.+?/master\.m3u8)\1', - webpage, 'm3u8 url', default=None, group='url') + r']+src=(["\'])(?P.+?/master\.m3u8[^"\']*)\1', + webpage, 'm3u8 url', group='url') formats = [] diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py index ea32b621c..ba1c15414 100644 --- a/youtube_dl/extractor/gazeta.py +++ b/youtube_dl/extractor/gazeta.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class GazetaIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P[A-Za-z0-9-_.]+)\.s?html)' + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P[A-Za-z0-9-_.]+)\.s?html)' _TESTS = [{ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', @@ -18,9 +18,22 @@ class GazetaIE(InfoExtractor): 'description': 'md5:38617526050bd17b234728e7f9620a71', 'thumbnail': 're:^https?://.*\.jpg', }, + 'skip': 'video not found', }, { 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', 'only_matching': True, + }, { + 'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml', + 'info_dict': { + 'id': '252048', + 'ext': 'mp4', + 'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['EaglePlatform'], }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 59ed4c38f..5d45faf85 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from ..utils import ( - remove_end, HEADRequest, sanitized_Request, urlencode_postdata, @@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor): { 'url': 'http://gdcvault.com/play/1020791/', 'only_matching': True, - } + }, + { + # Hard-coded hostname + 'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface', + 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', + 'info_dict': { + 'id': '1023460', + 'ext': 'mp4', + 'display_id': 'Tenacious-Design-and-The-Interface', + 'title': 'Tenacious Design and The Interface of \'Destiny\'', + }, + }, + { + # Multiple audios + 'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC', + 'info_dict': { + 'id': '1014631', + 'ext': 'flv', + 'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + 'format': 'jp', # The japanese audio + } + }, ] - def _parse_mp4(self, xml_description): - video_formats = [] - mp4_video = xml_description.find('./metadata/mp4video') - if mp4_video is None: - return None - - mobj = re.match(r'(?Phttps?://.*?/).*', mp4_video.text) - video_root = mobj.group('root') - formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') - for format in formats: - mobj = re.match(r'mp4\:(?P.*)', format.find('streamName').text) - url = video_root + mobj.group('path') - vbr = format.find('bitrate').text - video_formats.append({ - 'url': url, - 'vbr': int(vbr), - }) - return video_formats - - def _parse_flv(self, xml_description): - formats = [] - akamai_url = xml_description.find('./metadata/akamaiHost').text - audios = xml_description.find('./metadata/audios') - if audios is not None: - for audio in audios: - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(audio.get('url'), '.flv'), - 'ext': 'flv', - 'vcodec': 'none', - 'format_id': audio.get('code'), - }) - slide_video_path = xml_description.find('./metadata/slideVideo').text - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(slide_video_path, '.flv'), - 'ext': 'flv', - 'format_note': 'slide deck video', - 'quality': -2, - 'preference': -2, - 'format_id': 'slides', - }) - speaker_video_path = xml_description.find('./metadata/speakerVideo').text - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(speaker_video_path, '.flv'), - 'ext': 'flv', - 'format_note': 'speaker video', - 'quality': -1, - 'preference': -1, - 'format_id': 'speaker', - }) - return formats - def _login(self, webpage_url, display_id): (username, password) = self._get_login_info() if username is None or password is None: @@ -159,9 +128,10 @@ class GDCVaultIE(InfoExtractor): 'title': title, } + PLAYER_REGEX = r'', - start_page, 'xml root', default=None) + PLAYER_REGEX, start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate login_res = self._login(webpage_url, display_id) @@ -171,27 +141,21 @@ class GDCVaultIE(InfoExtractor): start_page = login_res # Grab the url from the authenticated page xml_root = self._html_search_regex( - r'', start_page, 'xml filename', default=None) if xml_name is None: # Fallback to the older format - xml_name = self._html_search_regex(r'', + start_page, 'xml filename') return { + '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, - 'title': video_title, - 'formats': video_formats, + 'url': '%s/xml/%s' % (xml_root, xml_name), + 'ie': 'DigitalSpeaking', } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 589d1e152..95d233259 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE +from .liveleak import LiveLeakIE class GenericIE(InfoExtractor): @@ -104,7 +105,8 @@ class GenericIE(InfoExtractor): 'skip_download': True, # infinite live stream }, 'expected_warnings': [ - r'501.*Not Implemented' + r'501.*Not Implemented', + r'400.*Bad Request', ], }, # Direct link with incorrect MIME type @@ -1128,6 +1130,30 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # Another form of arte.tv embed + { + 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html', + 'md5': '850bfe45417ddf221288c88a0cffe2e2', + 'info_dict': { + 'id': '030273-562_PLUS7-F', + 'ext': 'mp4', + 'title': 'ARTE Reportage - Nulle part, en France', + 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d', + 'upload_date': '20160409', + }, + }, + # LiveLeak embed + { + 'url': 'http://www.wykop.pl/link/3088787/', + 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', + 'info_dict': { + 'id': '874_1459135191', + 'ext': 'mp4', + 'title': 'Man shows poor quality of new apartment building', + 'description': 'The wall is like a sand pile.', + 'uploader': 'Lake8737', + } + }, ] def report_following_redirect(self, new_url): @@ -1702,7 +1728,7 @@ class GenericIE(InfoExtractor): # Look for embedded arte.tv player mobj = re.search( - r'