Merge pull request #9110 from remitamine/parse_duration
authorSergey M <dstftw@gmail.com>
Thu, 21 Apr 2016 15:53:16 +0000 (22:53 +0700)
committerSergey M <dstftw@gmail.com>
Thu, 21 Apr 2016 15:53:16 +0000 (22:53 +0700)
[utils] imporove parse_duration to handle more formats

97 files changed:
.github/ISSUE_TEMPLATE.md
.gitignore
AUTHORS
CONTRIBUTING.md
Makefile
README.md
devscripts/lazy_load_template.py [new file with mode: 0644]
devscripts/make_lazy_extractors.py [new file with mode: 0644]
docs/supportedsites.md
setup.cfg
setup.py
test/helper.py
test/test_InfoExtractor.py
test/test_utils.py
test/test_youtube_lists.py
youtube_dl/YoutubeDL.py
youtube_dl/downloader/external.py
youtube_dl/downloader/rtsp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/acast.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/cliprs.py [new file with mode: 0644]
youtube_dl/extractor/common.py
youtube_dl/extractor/democracynow.py
youtube_dl/extractor/dispeak.py [new file with mode: 0644]
youtube_dl/extractor/eagleplatform.py
youtube_dl/extractor/ebaumsworld.py
youtube_dl/extractor/extractors.py [new file with mode: 0644]
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/gazeta.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/glide.py
youtube_dl/extractor/goshgay.py
youtube_dl/extractor/gputechconf.py
youtube_dl/extractor/groupon.py
youtube_dl/extractor/howstuffworks.py
youtube_dl/extractor/huffpost.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/izlesene.py
youtube_dl/extractor/jadorecettepub.py [deleted file]
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/karaoketv.py
youtube_dl/extractor/karrierevideos.py
youtube_dl/extractor/kuwo.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/lecture2go.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/mdr.py
youtube_dl/extractor/metacritic.py
youtube_dl/extractor/mgtv.py [new file with mode: 0644]
youtube_dl/extractor/ministrygrid.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mooshare.py [deleted file]
youtube_dl/extractor/musicplayon.py
youtube_dl/extractor/nerdist.py [deleted file]
youtube_dl/extractor/neteasemusic.py
youtube_dl/extractor/newgrounds.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/onionstudios.py
youtube_dl/extractor/people.py [new file with mode: 0644]
youtube_dl/extractor/presstv.py [new file with mode: 0644]
youtube_dl/extractor/puls4.py
youtube_dl/extractor/quickvid.py [deleted file]
youtube_dl/extractor/rottentomatoes.py
youtube_dl/extractor/screencastomatic.py
youtube_dl/extractor/sportbox.py
youtube_dl/extractor/streetvoice.py
youtube_dl/extractor/tdslifeway.py [new file with mode: 0644]
youtube_dl/extractor/telebruxelles.py
youtube_dl/extractor/theonion.py [deleted file]
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/ubu.py [deleted file]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/varzesh3.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/videodetective.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/wayofthemaster.py [deleted file]
youtube_dl/extractor/xboxclips.py
youtube_dl/extractor/youtube.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index bf9494646c32e7a9040677cff352fa3318645535..35f8e686346d8ea25b442cfcd882b350726a54ef 100644 (file)
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.19**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.04.06
+[debug] youtube-dl version 2016.04.19
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
index 26dbde73d412673ee9c53ee06a476a803a92edc7..72c10425d675f7c1952061be0057db0c2e5e232d 100644 (file)
@@ -13,6 +13,7 @@ README.txt
 youtube-dl.1
 youtube-dl.bash-completion
 youtube-dl.fish
+youtube_dl/extractor/lazy_extractors.py
 youtube-dl
 youtube-dl.exe
 youtube-dl.tar.gz
diff --git a/AUTHORS b/AUTHORS
index ea8d399785602b268cd1de4b78487b85ad1cd63a..07cade723be12afdbbf60485d9dbc2890d6c0f32 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -167,3 +167,4 @@ Kacper Michajłow
 José Joaquín Atria
 Viťas Strádal
 Kagami Hiiragi
+Philip Huppert
index 0df6193fb3cdc13f415d2dc6d71c3c82074bb259..c83b8655a595d9d040ef09c2c11c07d51f3f7d29 100644 (file)
@@ -140,14 +140,14 @@ After you have ensured this site is distributing it's content legally, you can f
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
-5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
-        $ git add youtube_dl/extractor/__init__.py
+        $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
         $ git commit -m '[yourextractor] Add new extractor'
         $ git push origin yourextractor
index ba7f7ed3663ddd3c7dccf18207f5292b539bdaf1..06cffcb710c6fd8fa6962007bd07d4753d5d5af6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
        find . -name "*.pyc" -delete
        find . -name "*.class" -delete
 
@@ -88,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
 
 fish-completion: youtube-dl.fish
 
+lazy-extractors: youtube_dl/extractor/lazy_extractors.py
+
+_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
+youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+       $(PYTHON) devscripts/make_lazy_extractors.py $@
+
 youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
index e972bf69f8aedc6ec235f7f50a83e8ac5b951994..cd18edd87877239f622b2acdfa4632f7291656f3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -889,14 +889,14 @@ After you have ensured this site is distributing it's content legally, you can f
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
-5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
 
-        $ git add youtube_dl/extractor/__init__.py
+        $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
         $ git commit -m '[yourextractor] Add new extractor'
         $ git push origin yourextractor
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
new file mode 100644 (file)
index 0000000..2e6e664
--- /dev/null
@@ -0,0 +1,19 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+
+class LazyLoadExtractor(object):
+    _module = None
+
+    @classmethod
+    def ie_key(cls):
+        return cls.__name__[:-2]
+
+    def __new__(cls, *args, **kwargs):
+        mod = __import__(cls._module, fromlist=(cls.__name__,))
+        real_cls = getattr(mod, cls.__name__)
+        instance = real_cls.__new__(real_cls)
+        instance.__init__(*args, **kwargs)
+        return instance
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
new file mode 100644 (file)
index 0000000..b5a8b91
--- /dev/null
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals, print_function
+
+from inspect import getsource
+import os
+from os.path import dirname as dirn
+import sys
+
+print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+lazy_extractors_filename = sys.argv[1]
+if os.path.exists(lazy_extractors_filename):
+    os.remove(lazy_extractors_filename)
+
+from youtube_dl.extractor import _ALL_CLASSES
+from youtube_dl.extractor.common import InfoExtractor
+
+with open('devscripts/lazy_load_template.py', 'rt') as f:
+    module_template = f.read()
+
+module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
+
+ie_template = '''
+class {name}(LazyLoadExtractor):
+    _VALID_URL = {valid_url!r}
+    _module = '{module}'
+'''
+
+make_valid_template = '''
+    @classmethod
+    def _make_valid_url(cls):
+        return {valid_url!r}
+'''
+
+
+def build_lazy_ie(ie, name):
+    valid_url = getattr(ie, '_VALID_URL', None)
+    s = ie_template.format(
+        name=name,
+        valid_url=valid_url,
+        module=ie.__module__)
+    if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
+        s += '\n' + getsource(ie.suitable)
+    if hasattr(ie, '_make_valid_url'):
+        # search extractors
+        s += make_valid_template.format(valid_url=ie._make_valid_url())
+    return s
+
+names = []
+for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
+    name = ie.ie_key() + 'IE'
+    src = build_lazy_ie(ie, name)
+    module_contents.append(src)
+    names.append(name)
+
+module_contents.append(
+    '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
+
+module_src = '\n'.join(module_contents) + '\n'
+
+with open(lazy_extractors_filename, 'wt') as f:
+    f.write(module_src)
index d6ee8476b8b24f216c22b09016b68d2f1bc24160..e12a7d1824804ce370ea18daead78e17531a417b 100644 (file)
@@ -50,6 +50,7 @@
  - **arte.tv:ddc**
  - **arte.tv:embed**
  - **arte.tv:future**
+ - **arte.tv:info**
  - **arte.tv:magazine**
  - **AtresPlayer**
  - **ATTTechChannel**
  - **Cinemassacre**
  - **Clipfish**
  - **cliphunter**
+ - **ClipRs**
  - **Clipsyndicate**
  - **cloudtime**: CloudTime
  - **Cloudy**
  - **ivi:compilation**: ivi.ru compilations
  - **ivideon**: Ivideon TV
  - **Izlesene**
- - **JadoreCettePub**
  - **JeuxVideo**
  - **Jove**
  - **jpopsuki.tv**
  - **miomio.tv**
  - **MiTele**: mitele.es
  - **mixcloud**
+ - **mixcloud:playlist**
+ - **mixcloud:stream**
+ - **mixcloud:user**
  - **MLB**
  - **Mnet**
  - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
  - **Mofosex**
  - **Mojvideo**
  - **Moniker**: allmyvideos.net and vidspot.net
- - **mooshare**: Mooshare.biz
  - **Morningstar**: morningstar.com
  - **Motherless**
  - **Motorsport**: motorsport.com
  - **ndr:embed:base**
  - **NDTV**
  - **NerdCubedFeed**
- - **Nerdist**
  - **netease:album**: 网易云音乐 - 专辑
  - **netease:djradio**: 网易云音乐 - 电台
  - **netease:mv**: 网易云音乐 - MV
  - **Pornotube**
  - **PornoVoisines**
  - **PornoXO**
+ - **PressTV**
  - **PrimeShareTV**
  - **PromptFile**
  - **prosiebensat1**: ProSiebenSat.1 Digital
  - **Tagesschau**
  - **Tapely**
  - **Tass**
+ - **TDSLifeway**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **TeachingChannel**
  - **TeleTask**
  - **TF1**
  - **TheIntercept**
- - **TheOnion**
  - **ThePlatform**
  - **ThePlatformFeed**
  - **TheScene**
  - **twitter**
  - **twitter:amplify**
  - **twitter:card**
- - **Ubu**
  - **udemy**
  - **udemy:course**
  - **UDNEmbed**: 聯合影音
  - **Walla**
  - **WashingtonPost**
  - **wat.tv**
- - **WayOfTheMaster**
  - **WDR**
  - **wdr:mobile**
  - **WDRMaus**: Sendung mit der Maus
index 5760112d4564bb4fe8389b9c134ef6ef406a81b3..2dc06ffe413f76f4d776fe44780f327a170d7801 100644 (file)
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,5 +2,5 @@
 universal = True
 
 [flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git
+exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
 ignore = E402,E501,E731
index bfe931f5b42a506ca3cceb1a5ec4acdb6a6a4813..9444d403d542a0d3066d9d633532e4daf11726e9 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -8,11 +8,12 @@ import warnings
 import sys
 
 try:
-    from setuptools import setup
+    from setuptools import setup, Command
     setuptools_available = True
 except ImportError:
-    from distutils.core import setup
+    from distutils.core import setup, Command
     setuptools_available = False
+from distutils.spawn import spawn
 
 try:
     # This will create an exe that needs Microsoft Visual C++ 2008
@@ -70,6 +71,22 @@ else:
     else:
         params['scripts'] = ['bin/youtube-dl']
 
+class build_lazy_extractors(Command):
+    description = "Build the extractor lazy loading module"
+    user_options = []
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        spawn(
+            [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'],
+            dry_run=self.dry_run,
+        )
+
 # Get the version from youtube_dl/version.py without importing the package
 exec(compile(open('youtube_dl/version.py').read(),
              'youtube_dl/version.py', 'exec'))
@@ -107,5 +124,6 @@ setup(
         "Programming Language :: Python :: 3.4",
     ],
 
+    cmdclass={'build_lazy_extractors': build_lazy_extractors},
     **params
 )
index f2d87821290095c1f9526f50db5d80ab31969d56..b8e22c5cb42f2e14465e812ed624aaa5e102ff5c 100644 (file)
@@ -143,6 +143,9 @@ def expect_value(self, got, expected, field):
             expect_value(self, item_got, item_expected, field)
     else:
         if isinstance(expected, compat_str) and expected.startswith('md5:'):
+            self.assertTrue(
+                isinstance(got, compat_str),
+                'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
             got = 'md5:' + md5(got)
         elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
             self.assertTrue(
index 938466a800122211ab0414d9aa9de831951e2903..6404ac89f55df282e9525f6ae1a8e62f7344dd40 100644 (file)
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import FakeYDL
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
+from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
 
 
 class TestIE(InfoExtractor):
@@ -66,5 +67,14 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._html_search_meta('e', html), '5')
         self.assertEqual(ie._html_search_meta('f', html), '6')
 
+    def test_download_json(self):
+        uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
+        self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
+        uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
+        self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
+        uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
+        self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
+        self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+
 if __name__ == '__main__':
     unittest.main()
index e0323a5c6a5ceabbe7e52e39a94e5b9d8efc81a5..e16a6761b7e9a70589c6da7b48c9f54e2c03e734 100644 (file)
@@ -20,6 +20,7 @@ from youtube_dl.utils import (
     args_to_str,
     encode_base_n,
     clean_html,
+    date_from_str,
     DateRange,
     detect_exe_version,
     determine_ext,
@@ -234,6 +235,13 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
 
+    def test_date_from_str(self):
+        self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
+        self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week'))
+        self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week'))
+        self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year'))
+        self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month'))
+
     def test_daterange(self):
         _20century = DateRange("19000101", "20000101")
         self.assertFalse("17890714" in _20century)
index 47df0f348d862e4ab455058d00321636123db807..af1c454217d0bec66a27a1bdc89c02195bb6274f 100644 (file)
@@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
         entries = result['entries']
-        self.assertTrue(len(entries) >= 20)
+        self.assertTrue(len(entries) >= 50)
         original_video = entries[0]
         self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
 
index d7aa951ff39fc54238c5759ee5a57f2c70d9de0d..a89a71a250e3c02cb1157bfc0970e308474e4e89 100755 (executable)
@@ -82,7 +82,7 @@ from .utils import (
     YoutubeDLHandler,
 )
 from .cache import Cache
-from .extractor import get_info_extractor, gen_extractors
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
 from .postprocessor import (
@@ -378,8 +378,9 @@ class YoutubeDL(object):
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
-        self._ies_instances[ie.ie_key()] = ie
-        ie.set_downloader(self)
+        if not isinstance(ie, type):
+            self._ies_instances[ie.ie_key()] = ie
+            ie.set_downloader(self)
 
     def get_info_extractor(self, ie_key):
         """
@@ -397,7 +398,7 @@ class YoutubeDL(object):
         """
         Add the InfoExtractors returned by gen_extractors to the end of the list
         """
-        for ie in gen_extractors():
+        for ie in gen_extractor_classes():
             self.add_info_extractor(ie)
 
     def add_post_processor(self, pp):
@@ -661,6 +662,7 @@ class YoutubeDL(object):
             if not ie.suitable(url):
                 continue
 
+            ie = self.get_info_extractor(ie.ie_key())
             if not ie.working():
                 self.report_warning('The program functionality for this site has been marked as broken, '
                                     'and will probably not work.')
@@ -1240,7 +1242,10 @@ class YoutubeDL(object):
             self.list_thumbnails(info_dict)
             return
 
-        if thumbnails and 'thumbnail' not in info_dict:
+        thumbnail = info_dict.get('thumbnail')
+        if thumbnail:
+            info_dict['thumbnail'] = sanitize_url(thumbnail)
+        elif thumbnails:
             info_dict['thumbnail'] = thumbnails[-1]['url']
 
         if 'display_id' not in info_dict and 'id' in info_dict:
@@ -1954,6 +1959,8 @@ class YoutubeDL(object):
         write_string(encoding_str, encoding=None)
 
         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+        if _LAZY_LOADER:
+            self._write_string('[debug] Lazy loading extractors enabled' + '\n')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
index 30277dc205787d226360cfc950e5c08796a5fc03..8d642fc3e60594f10a057847cf5702f715941326 100644 (file)
@@ -225,7 +225,7 @@ class FFmpegFD(ExternalFD):
 
         args += ['-i', url, '-c', 'copy']
         if protocol == 'm3u8':
-            if self.params.get('hls_use_mpegts', False):
+            if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
                 args += ['-f', 'mpegts']
             else:
                 args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
index 3eb29526cbc90cb3351c75876698a1b238c07ef8..939358b2a2f00edaca5283d311d89ab220d26966 100644 (file)
@@ -27,6 +27,8 @@ class RtspFD(FileDownloader):
             self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
             return False
 
+        self._debug_cmd(args)
+
         retval = subprocess.call(args)
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
index c3121d83c5875ce1e9fe130f8eb8f23cc7935efe..18d8dbcd6672f82776a9bd9f6f4cc63cac91129d 100644 (file)
 from __future__ import unicode_literals
 
-from .abc import ABCIE
-from .abc7news import Abc7NewsIE
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
-    ACastIE,
-    ACastChannelIE,
-)
-from .addanime import AddAnimeIE
-from .adobetv import (
-    AdobeTVIE,
-    AdobeTVShowIE,
-    AdobeTVChannelIE,
-    AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import AENetworksIE
-from .aftonbladet import AftonbladetIE
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .animeondemand import AnimeOnDemandIE
-from .anitube import AnitubeIE
-from .anysex import AnySexIE
-from .aol import (
-    AolIE,
-    AolFeaturesIE,
-)
-from .allocine import AllocineIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
-    AppleTrailersIE,
-    AppleTrailersSectionIE,
-)
-from .archiveorg import ArchiveOrgIE
-from .ard import (
-    ARDIE,
-    ARDMediathekIE,
-    SportschauIE,
-)
-from .arte import (
-    ArteTvIE,
-    ArteTVPlus7IE,
-    ArteTVCreativeIE,
-    ArteTVConcertIE,
-    ArteTVFutureIE,
-    ArteTVCinemaIE,
-    ArteTVDDCIE,
-    ArteTVMagazineIE,
-    ArteTVEmbedIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .audimedia import AudiMediaIE
-from .audioboom import AudioBoomIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .azubu import AzubuIE, AzubuLiveIE
-from .baidu import BaiduVideoIE
-from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE, BandcampAlbumIE
-from .bbc import (
-    BBCCoUkIE,
-    BBCCoUkArticleIE,
-    BBCIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .beatportpro import BeatportProIE
-from .bet import BetIE
-from .bigflix import BigflixIE
-from .bild import BildIE
-from .bilibili import BiliBiliIE
-from .biobiochiletv import BioBioChileTVIE
-from .bleacherreport import (
-    BleacherReportIE,
-    BleacherReportCMSIE,
-)
-from .blinkx import BlinkxIE
-from .bloomberg import BloombergIE
-from .bokecc import BokeCCIE
-from .bpb import BpbIE
-from .br import BRIE
-from .bravotv import BravoTVIE
-from .breakcom import BreakIE
-from .brightcove import (
-    BrightcoveLegacyIE,
-    BrightcoveNewIE,
-)
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .camdemy import (
-    CamdemyIE,
-    CamdemyFolderIE
-)
-from .camwithher import CamWithHerIE
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import CanvasIE
-from .cbc import (
-    CBCIE,
-    CBCPlayerIE,
-)
-from .cbs import CBSIE
-from .cbsinteractive import CBSInteractiveIE
-from .cbsnews import (
-    CBSNewsIE,
-    CBSNewsLiveVideoIE,
-)
-from .cbssports import CBSSportsIE
-from .ccc import CCCIE
-from .cda import CDAIE
-from .ceskatelevize import CeskaTelevizeIE
-from .channel9 import Channel9IE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chirbit import (
-    ChirbitIE,
-    ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemassacre import CinemassacreIE
-from .clipfish import ClipfishIE
-from .cliphunter import CliphunterIE
-from .clipsyndicate import ClipsyndicateIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnbc import CNBCIE
-from .cnn import (
-    CNNIE,
-    CNNBlogsIE,
-    CNNArticleIE,
-)
-from .collegehumor import CollegeHumorIE
-from .collegerama import CollegeRamaIE
-from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
-from .comcarcoff import ComCarCoffIE
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import RtmpIE
-from .condenast import CondeNastIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .criterion import CriterionIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crunchyroll import (
-    CrunchyrollIE,
-    CrunchyrollShowPlaylistIE
-)
-from .cspan import CSpanIE
-from .ctsnews import CtsNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .cwtv import CWTVIE
-from .dailymotion import (
-    DailymotionIE,
-    DailymotionPlaylistIE,
-    DailymotionUserIE,
-    DailymotionCloudIE,
-)
-from .daum import (
-    DaumIE,
-    DaumClipIE,
-    DaumPlaylistIE,
-    DaumUserIE,
-)
-from .dbtv import DBTVIE
-from .dcn import (
-    DCNIE,
-    DCNVideoIE,
-    DCNLiveIE,
-    DCNSeasonIE,
-)
-from .dctp import DctpTvIE
-from .deezer import DeezerPlaylistIE
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .dotsub import DotsubIE
-from .douyutv import DouyuTVIE
-from .dplay import DPlayIE
-from .dramafever import (
-    DramaFeverIE,
-    DramaFeverSeriesIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import DRTVIE
-from .dvtv import DVTVIE
-from .dump import DumpIE
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .discovery import DiscoveryIE
-from .dropbox import DropboxIE
-from .dw import (
-    DWIE,
-    DWArticleIE,
-)
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentv import (
-    EllenTVIE,
-    EllenTVClipsIE,
-)
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
-from .escapist import EscapistIE
-from .espn import ESPNIE
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .everyonesmixtape import EveryonesMixtapeIE
-from .exfm import ExfmIE
-from .expotv import ExpoTVIE
-from .extremetube import ExtremeTubeIE
-from .facebook import FacebookIE
-from .faz import FazIE
-from .fc2 import FC2IE
-from .fczenit import FczenitIE
-from .firstpost import FirstpostIE
-from .firsttv import FirstTVIE
-from .fivemin import FiveMinIE
-from .fivetv import FiveTVIE
-from .fktv import FKTVIE
-from .flickr import FlickrIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .fourtube import FourTubeIE
-from .fox import FOXIE
-from .foxgay import FoxgayIE
-from .foxnews import FoxNewsIE
-from .foxsports import FoxSportsIE
-from .franceculture import (
-    FranceCultureIE,
-    FranceCultureEmissionIE,
-)
-from .franceinter import FranceInterIE
-from .francetv import (
-    PluzzIE,
-    FranceTvInfoIE,
-    FranceTVIE,
-    GenerationQuoiIE,
-    CultureboxIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .freevideo import FreeVideoIE
-from .funimation import FunimationIE
-from .funnyordie import FunnyOrDieIE
-from .gameinformer import GameInformerIE
-from .gamekings import GamekingsIE
-from .gameone import (
-    GameOneIE,
-    GameOnePlaylistIE,
-)
-from .gamersyde import GamersydeIE
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gametrailers import GametrailersIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .generic import GenericIE
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
-    GloboIE,
-    GloboArticleIE,
-)
-from .godtube import GodTubeIE
-from .goldenmoustache import GoldenMoustacheIE
-from .golem import GolemIE
-from .googledrive import GoogleDriveIE
-from .googleplus import GooglePlusIE
-from .googlesearch import GoogleSearchIE
-from .goshgay import GoshgayIE
-from .gputechconf import GPUTechConfIE
-from .groupon import GrouponIE
-from .hark import HarkIE
-from .hbo import HBOIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hornbunny import HornBunnyIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import HotStarIE
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .huffpost import HuffPostIE
-from .hypem import HypemIE
-from .iconosquare import IconosquareIE
-from .ign import (
-    IGNIE,
-    OneUPIE,
-    PCMagIE,
-)
-from .imdb import (
-    ImdbIE,
-    ImdbListIE
-)
-from .imgur import (
-    ImgurIE,
-    ImgurAlbumIE,
-)
-from .ina import InaIE
-from .indavideo import (
-    IndavideoIE,
-    IndavideoEmbedIE,
-)
-from .infoq import InfoQIE
-from .instagram import InstagramIE, InstagramUserIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import IPrimaIE
-from .iqiyi import IqiyiIE
-from .ir90tv import Ir90TvIE
-from .ivi import (
-    IviIE,
-    IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .izlesene import IzleseneIE
-from .jadorecettepub import JadoreCettePubIE
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .jwplatform import JWPlatformIE
-from .jpopsukitv import JpopsukiIE
-from .kaltura import KalturaIE
-from .kanalplay import KanalPlayIE
-from .kankan import KankanIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .khanacademy import KhanAcademyIE
-from .kickstarter import KickStarterIE
-from .keek import KeekIE
-from .konserthusetplay import KonserthusetPlayIE
-from .kontrtube import KontrTubeIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kusi import KUSIIE
-from .kuwo import (
-    KuwoIE,
-    KuwoAlbumIE,
-    KuwoChartIE,
-    KuwoSingerIE,
-    KuwoCategoryIE,
-    KuwoMvIE,
-)
-from .la7 import LA7IE
-from .laola1tv import Laola1TvIE
-from .lecture2go import Lecture2GoIE
-from .lemonde import LemondeIE
-from .leeco import (
-    LeIE,
-    LePlaylistIE,
-    LetvCloudIE,
-)
-from .libsyn import LibsynIE
-from .lifenews import (
-    LifeNewsIE,
-    LifeEmbedIE,
-)
-from .limelight import (
-    LimelightMediaIE,
-    LimelightChannelIE,
-    LimelightChannelListIE,
-)
-from .liveleak import LiveLeakIE
-from .livestream import (
-    LivestreamIE,
-    LivestreamOriginalIE,
-    LivestreamShortenerIE,
-)
-from .lnkgo import LnkGoIE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import LRTIE
-from .lynda import (
-    LyndaIE,
-    LyndaCourseIE
-)
-from .m6 import M6IE
-from .macgamestore import MacGameStoreIE
-from .mailru import MailRuIE
-from .makerschannel import MakersChannelIE
-from .makertv import MakerTVIE
-from .malemotion import MalemotionIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .minhateca import MinhatecaIE
-from .ministrygrid import MinistryGridIE
-from .minoto import MinotoIE
-from .miomio import MioMioIE
-from .mit import TechTVMITIE, MITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixcloud import MixcloudIE
-from .mlb import MLBIE
-from .mnet import MnetIE
-from .mpora import MporaIE
-from .moevideo import MoeVideoIE
-from .mofosex import MofosexIE
-from .mojvideo import MojvideoIE
-from .moniker import MonikerIE
-from .mooshare import MooshareIE
-from .morningstar import MorningstarIE
-from .motherless import MotherlessIE
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviezine import MoviezineIE
-from .mtv import (
-    MTVIE,
-    MTVServicesEmbeddedIE,
-    MTVIggyIE,
-    MTVDEIE,
-)
-from .muenchentv import MuenchenTVIE
-from .musicplayon import MusicPlayOnIE
-from .muzu import MuzuTVIE
-from .mwave import MwaveIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import MyviIE
-from .myvideo import MyVideoIE
-from .myvidster import MyVidsterIE
-from .nationalgeographic import (
-    NationalGeographicIE,
-    NationalGeographicChannelIE,
-)
-from .naver import NaverIE
-from .nba import NBAIE
-from .nbc import (
-    CSNNEIE,
-    NBCIE,
-    NBCNewsIE,
-    NBCSportsIE,
-    NBCSportsVPlayerIE,
-    MSNBCIE,
-)
-from .ndr import (
-    NDRIE,
-    NJoyIE,
-    NDREmbedBaseIE,
-    NDREmbedIE,
-    NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .netzkino import NetzkinoIE
-from .nerdcubed import NerdCubedFeedIE
-from .nerdist import NerdistIE
-from .neteasemusic import (
-    NetEaseMusicIE,
-    NetEaseMusicAlbumIE,
-    NetEaseMusicSingerIE,
-    NetEaseMusicListIE,
-    NetEaseMusicMvIE,
-    NetEaseMusicProgramIE,
-    NetEaseMusicDjRadioIE,
-)
-from .newgrounds import NewgroundsIE
-from .newstube import NewstubeIE
-from .nextmedia import (
-    NextMediaIE,
-    NextMediaActionNewsIE,
-    AppleDailyIE,
-)
-from .nextmovie import NextMovieIE
-from .nfb import NFBIE
-from .nfl import NFLIE
-from .nhl import (
-    NHLIE,
-    NHLNewsIE,
-    NHLVideocenterIE,
-)
-from .nick import NickIE
-from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninegag import NineGagIE
-from .noco import NocoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import NovaIE
-from .novamov import (
-    AuroraVidIE,
-    CloudTimeIE,
-    NowVideoIE,
-    VideoWeedIE,
-    WholeCloudIE,
-)
-from .nowness import (
-    NownessIE,
-    NownessPlaylistIE,
-    NownessSeriesIE,
-)
-from .nowtv import (
-    NowTVIE,
-    NowTVListIE,
-)
-from .noz import NozIE
-from .npo import (
-    NPOIE,
-    NPOLiveIE,
-    NPORadioIE,
-    NPORadioFragmentIE,
-    SchoolTVIE,
-    VPROIE,
-    WNLIE
-)
-from .npr import NprIE
-from .nrk import (
-    NRKIE,
-    NRKPlaylistIE,
-    NRKSkoleIE,
-    NRKTVIE,
-)
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
-    NYTimesIE,
-    NYTimesArticleIE,
-)
-from .nuvid import NuvidIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
-    OoyalaIE,
-    OoyalaExternalIE,
-)
-from .openload import OpenloadIE
-from .ora import OraTVIE
-from .orf import (
-    ORFTVthekIE,
-    ORFOE1IE,
-    ORFFM4IE,
-    ORFIPTVIE,
-)
-from .pandoratv import PandoraTVIE
-from .parliamentliveuk import ParliamentLiveUKIE
-from .patreon import PatreonIE
-from .pbs import PBSIE
-from .periscope import PeriscopeIE
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .pinkbike import PinkbikeIE
-from .planetaplay import PlanetaPlayIE
-from .pladform import PladformIE
-from .played import PlayedIE
-from .playfm import PlayFMIE
-from .plays import PlaysTVIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .pluralsight import (
-    PluralsightIE,
-    PluralsightCourseIE,
-)
-from .podomatic import PodomaticIE
-from .porn91 import Porn91IE
-from .pornhd import PornHdIE
-from .pornhub import (
-    PornHubIE,
-    PornHubPlaylistIE,
-    PornHubUserVideosIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .primesharetv import PrimeShareTVIE
-from .promptfile import PromptFileIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
-    QQMusicIE,
-    QQMusicSingerIE,
-    QQMusicAlbumIE,
-    QQMusicToplistIE,
-    QQMusicPlaylistIE,
-)
-from .quickvid import QuickVidIE
-from .r7 import R7IE
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .rai import (
-    RaiTVIE,
-    RaiIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rds import RDSIE
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .restudy import RestudyIE
-from .reverbnation import ReverbNationIE
-from .revision3 import Revision3IE
-from .rice import RICEIE
-from .ringtv import RingTVIE
-from .ro220 import Ro220IE
-from .rottentomatoes import RottenTomatoesIE
-from .roxwel import RoxwelIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import RTL2IE
-from .rtp import RTPIE
-from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
-from .rtvnh import RTVNHIE
-from .ruhd import RUHDIE
-from .ruleporn import RulePornIE
-from .rutube import (
-    RutubeIE,
-    RutubeChannelIE,
-    RutubeEmbedIE,
-    RutubeMovieIE,
-    RutubePersonIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .sandia import SandiaIE
-from .safari import (
-    SafariIE,
-    SafariApiIE,
-    SafariCourseIE,
-)
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .scivee import SciVeeIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .screenjunkies import ScreenJunkiesIE
-from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
-from .senateisvp import SenateISVPIE
-from .servingsys import ServingSysIE
-from .sexu import SexuIE
-from .sexykarma import SexyKarmaIE
-from .shahid import ShahidIE
-from .shared import SharedIE
-from .sharesix import ShareSixIE
-from .sina import SinaIE
-from .skynewsarabia import (
-    SkyNewsArabiaIE,
-    SkyNewsArabiaArticleIE,
-)
-from .slideshare import SlideshareIE
-from .slutload import SlutloadIE
-from .smotri import (
-    SmotriIE,
-    SmotriCommunityIE,
-    SmotriUserIE,
-    SmotriBroadcastIE,
-)
-from .snagfilms import (
-    SnagFilmsIE,
-    SnagFilmsEmbedIE,
-)
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .soundcloud import (
-    SoundcloudIE,
-    SoundcloudSetIE,
-    SoundcloudUserIE,
-    SoundcloudPlaylistIE,
-    SoundcloudSearchIE
-)
-from .soundgasm import (
-    SoundgasmIE,
-    SoundgasmProfileIE
-)
-from .southpark import (
-    SouthParkIE,
-    SouthParkDeIE,
-    SouthParkDkIE,
-    SouthParkEsIE,
-    SouthParkNlIE
-)
-from .spankbang import SpankBangIE
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE, SpiegelArticleIE
-from .spiegeltv import SpiegeltvIE
-from .spike import SpikeIE
-from .stitcher import StitcherIE
-from .sport5 import Sport5IE
-from .sportbox import (
-    SportBoxIE,
-    SportBoxEmbedIE,
-)
-from .sportdeutschland import SportDeutschlandIE
-from .srgssr import (
-    SRGSSRIE,
-    SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .ssa import SSAIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .steam import SteamIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streetvoice import StreetVoiceIE
-from .sunporno import SunPornoIE
-from .svt import (
-    SVTIE,
-    SVTPlayIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
-from .tapely import TapelyIE
-from .tass import TassIE
-from .teachertube import (
-    TeacherTubeIE,
-    TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .techtalks import TechTalksIE
-from .ted import TEDIE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telemb import TeleMBIE
-from .teletask import TeleTaskIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .theintercept import TheInterceptIE
-from .theonion import TheOnionIE
-from .theplatform import (
-    ThePlatformIE,
-    ThePlatformFeedIE,
-)
-from .thescene import TheSceneIE
-from .thesixtyone import TheSixtyOneIE
-from .thestar import TheStarIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .tinypic import TinyPicIE
-from .tlc import TlcDeIE
-from .tmz import (
-    TMZIE,
-    TMZArticleIE,
-)
-from .tnaflix import (
-    TNAFlixNetworkEmbedIE,
-    TNAFlixIE,
-    EMPFlixIE,
-    MovieFapIE,
-)
-from .toggle import ToggleIE
-from .thvideo import (
-    THVideoIE,
-    THVideoPlaylistIE
-)
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trollvids import TrollvidsIE
-from .trutube import TruTubeIE
-from .tube8 import Tube8IE
-from .tubitv import TubiTvIE
-from .tudou import (
-    TudouIE,
-    TudouPlaylistIE,
-    TudouAlbumIE,
-)
-from .tumblr import TumblrIE
-from .tunein import (
-    TuneInClipIE,
-    TuneInStationIE,
-    TuneInProgramIE,
-    TuneInTopicIE,
-    TuneInShortenerIE,
-)
-from .turbo import TurboIE
-from .tutv import TutvIE
-from .tv2 import (
-    TV2IE,
-    TV2ArticleIE,
-)
-from .tv3 import TV3IE
-from .tv4 import TV4IE
-from .tvc import (
-    TVCIE,
-    TVCArticleIE,
-)
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvp import TvpIE, TvpSeriesIE
-from .tvplay import TVPlayIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
-    TwentyTwoTracksIE,
-    TwentyTwoTracksGenreIE
-)
-from .twitch import (
-    TwitchVideoIE,
-    TwitchChapterIE,
-    TwitchVodIE,
-    TwitchProfileIE,
-    TwitchPastBroadcastsIE,
-    TwitchBookmarksIE,
-    TwitchStreamIE,
-)
-from .twitter import (
-    TwitterCardIE,
-    TwitterIE,
-    TwitterAmplifyIE,
-)
-from .ubu import UbuIE
-from .udemy import (
-    UdemyIE,
-    UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .digiteka import DigitekaIE
-from .unistra import UnistraIE
-from .urort import UrortIE
-from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .ustudio import UstudioIE
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veoh import VeohIE
-from .vessel import VesselIE
-from .vesti import VestiIE
-from .vevo import VevoIE
-from .vgtv import (
-    BTArticleIE,
-    BTVestlendingenIE,
-    VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import (
-    ViceIE,
-    ViceShowIE,
-)
-from .viddler import ViddlerIE
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomega import VideoMegaIE
-from .videomore import (
-    VideomoreIE,
-    VideomoreVideoIE,
-    VideomoreSeasonIE,
-)
-from .videopremium import VideoPremiumIE
-from .videott import VideoTtIE
-from .vidme import (
-    VidmeIE,
-    VidmeUserIE,
-    VidmeUserLikesIE,
-)
-from .vidzi import VidziIE
-from .vier import VierIE, VierVideosIE
-from .viewster import ViewsterIE
-from .viidea import ViideaIE
-from .vimeo import (
-    VimeoIE,
-    VimeoAlbumIE,
-    VimeoChannelIE,
-    VimeoGroupsIE,
-    VimeoLikesIE,
-    VimeoOndemandIE,
-    VimeoReviewIE,
-    VimeoUserIE,
-    VimeoWatchLaterIE,
-)
-from .vimple import VimpleIE
-from .vine import (
-    VineIE,
-    VineUserIE,
-)
-from .viki import (
-    VikiIE,
-    VikiChannelIE,
-)
-from .vk import (
-    VKIE,
-    VKUserVideosIE,
-)
-from .vlive import VLiveIE
-from .vodlocker import VodlockerIE
-from .voicerepublic import VoiceRepublicIE
-from .voxmedia import VoxMediaIE
-from .vporn import VpornIE
-from .vrt import VRTIE
-from .vube import VubeIE
-from .vuclip import VuClipIE
-from .vulture import VultureIE
-from .walla import WallaIE
-from .washingtonpost import WashingtonPostIE
-from .wat import WatIE
-from .wayofthemaster import WayOfTheMasterIE
-from .wdr import (
-    WDRIE,
-    WDRMobileIE,
-    WDRMausIE,
-)
-from .webofstories import (
-    WebOfStoriesIE,
-    WebOfStoriesPlaylistIE,
-)
-from .weibo import WeiboIE
-from .weiqitv import WeiqiTVIE
-from .wimp import WimpIE
-from .wistia import WistiaIE
-from .worldstarhiphop import WorldStarHipHopIE
-from .wrzuta import WrzutaIE
-from .wsj import WSJIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
-    XHamsterIE,
-    XHamsterEmbedIE,
-)
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
-    YahooIE,
-    YahooSearchIE,
-)
-from .yam import YamIE
-from .yandexmusic import (
-    YandexMusicTrackIE,
-    YandexMusicAlbumIE,
-    YandexMusicPlaylistIE,
-)
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import YoukuIE
-from .youporn import YouPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
-    YoutubeIE,
-    YoutubeChannelIE,
-    YoutubeFavouritesIE,
-    YoutubeHistoryIE,
-    YoutubeLiveIE,
-    YoutubePlaylistIE,
-    YoutubePlaylistsIE,
-    YoutubeRecommendedIE,
-    YoutubeSearchDateIE,
-    YoutubeSearchIE,
-    YoutubeSearchURLIE,
-    YoutubeShowIE,
-    YoutubeSubscriptionsIE,
-    YoutubeTruncatedIDIE,
-    YoutubeTruncatedURLIE,
-    YoutubeUserIE,
-    YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import (
-    ZingMp3SongIE,
-    ZingMp3AlbumIE,
-)
-from .zippcast import ZippCastIE
-
-_ALL_CLASSES = [
-    klass
-    for name, klass in globals().items()
-    if name.endswith('IE') and name != 'GenericIE'
-]
-_ALL_CLASSES.append(GenericIE)
+try:
+    from .lazy_extractors import *
+    from .lazy_extractors import _ALL_CLASSES
+    _LAZY_LOADER = True
+except ImportError:
+    _LAZY_LOADER = False
+    from .extractors import *
+
+    _ALL_CLASSES = [
+        klass
+        for name, klass in globals().items()
+        if name.endswith('IE') and name != 'GenericIE'
+    ]
+    _ALL_CLASSES.append(GenericIE)
+
+
+def gen_extractor_classes():
+    """ Return a list of supported extractors.
+    The order does matter; the first extractor matched is the one handling the URL.
+    """
+    return _ALL_CLASSES
 
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
     """
-    return [klass() for klass in _ALL_CLASSES]
+    return [klass() for klass in gen_extractor_classes()]
 
 
 def list_extractors(age_limit):
index 92eee8119da420a78782d5eea05a2d3564468d3a..94ce88c834f5ce1575b36f839f02fdf43f96e046 100644 (file)
@@ -2,10 +2,14 @@
 from __future__ import unicode_literals
 
 import re
+import functools
 
 from .common import InfoExtractor
 from ..compat import compat_str
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    OnDemandPagedList,
+)
 
 
 class ACastIE(InfoExtractor):
@@ -26,13 +30,8 @@ class ACastIE(InfoExtractor):
 
     def _real_extract(self, url):
         channel, display_id = re.match(self._VALID_URL, url).groups()
-
-        embed_page = self._download_webpage(
-            re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
-        cast_data = self._parse_json(self._search_regex(
-            r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
-            display_id)['GetAcast/%s/%s' % (channel, display_id)]
-
+        cast_data = self._download_json(
+            'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
         return {
             'id': compat_str(cast_data['id']),
             'display_id': display_id,
@@ -58,15 +57,26 @@ class ACastChannelIE(InfoExtractor):
         'playlist_mincount': 20,
     }
     _API_BASE_URL = 'https://www.acast.com/api/'
+    _PAGE_SIZE = 10
 
     @classmethod
     def suitable(cls, url):
         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
 
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
-        casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
-        entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
+    def _fetch_page(self, channel_slug, page):
+        casts = self._download_json(
+            self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
+            channel_slug, note='Download page %d of channel data' % page)
+        for cast in casts:
+            yield self.url_result(
+                'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
+                'ACast', cast['id'])
 
-        return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
+    def _real_extract(self, url):
+        channel_slug = self._match_id(url)
+        channel_data = self._download_json(
+            self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
+        entries = OnDemandPagedList(functools.partial(
+            self._fetch_page, channel_slug), self._PAGE_SIZE)
+        return self.playlist_result(entries, compat_str(
+            channel_data['id']), channel_data['name'], channel_data.get('description'))
index 95a99c6b0d567c52b477a1964d9c055d0b0a6b8a..24df8fe9305e7df0487965ed03756305feca3dea 100644 (file)
+# coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
 
 
 class AolIE(InfoExtractor):
     IE_NAME = 'on.aol.com'
-    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
+    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)'
 
     _TESTS = [{
+        # video with 5min ID
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
         'md5': '18ef68f48740e86ae94b98da815eec42',
         'info_dict': {
             'id': '518167793',
             'ext': 'mp4',
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
+            'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.',
+            'timestamp': 1395405060,
+            'upload_date': '20140321',
+            'uploader': 'Newsy Studio',
         },
-        'add_ie': ['FiveMin'],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }, {
+        # video with vidible ID
+        'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
+        'info_dict': {
+            'id': '5707d6b8e4b090497b04f706',
+            'ext': 'mp4',
+            'title': 'Netflix is Raising Rates',
+            'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.',
+            'upload_date': '20160408',
+            'timestamp': 1460123280,
+            'uploader': 'Veuer',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
+        'only_matching': True,
+    }, {
+        'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        return self.url_result('5min:%s' % video_id)
+
+        response = self._download_json(
+            'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
+            video_id)['response']
+        if response['statusText'] != 'Ok':
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
+
+        video_data = response['data']
+        formats = []
+        m3u8_url = video_data.get('videoMasterPlaylist')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+        for rendition in video_data.get('renditions', []):
+            video_url = rendition.get('url')
+            if not video_url:
+                continue
+            ext = rendition.get('format')
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+            else:
+                f = {
+                    'url': video_url,
+                    'format_id': rendition.get('quality'),
+                }
+                mobj = re.search(r'(\d+)x(\d+)', video_url)
+                if mobj:
+                    f.update({
+                        'width': int(mobj.group(1)),
+                        'height': int(mobj.group(2)),
+                    })
+                formats.append(f)
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+        return {
+            'id': video_id,
+            'title': video_data['title'],
+            'duration': int_or_none(video_data.get('duration')),
+            'timestamp': int_or_none(video_data.get('publishDate')),
+            'view_count': int_or_none(video_data.get('views')),
+            'description': video_data.get('description'),
+            'uploader': video_data.get('videoOwner'),
+            'formats': formats,
+        }
 
 
 class AolFeaturesIE(InfoExtractor):
index 9fb84911a0b81fd42de2c9bd410cdaf2dd4813a6..26446c2fe1e4ecb0b15b6ec87a927a2b6151a1da 100644 (file)
@@ -83,7 +83,7 @@ class ARDMediathekIE(InfoExtractor):
         subtitle_url = media_info.get('_subtitleUrl')
         if subtitle_url:
             subtitles['de'] = [{
-                'ext': 'srt',
+                'ext': 'ttml',
                 'url': subtitle_url,
             }]
 
index ae0f27dcbe059c0d469eaeca243ef59400ff68d6..a9e3266dcb138794774e30ad2c0af0dea645463f 100644 (file)
@@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor):
 # It also uses the arte_vp_url url from the webpage to extract the information
 class ArteTVCreativeIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:creative'
-    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
@@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
             'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
             'upload_date': '20140805',
         }
+    }, {
+        'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
+        'only_matching': True,
     }]
 
 
+class ArteTVInfoIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:info'
+    _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+    _TEST = {
+        'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
+        'info_dict': {
+            'id': '067528-000-A',
+            'ext': 'mp4',
+            'title': 'Service civique, un cache misère ?',
+            'upload_date': '20160403',
+        },
+    }
+
+
 class ArteTVFutureIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:future'
     _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
@@ -337,7 +355,7 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
     IE_NAME = 'arte.tv:embed'
     _VALID_URL = r'''(?x)
         http://www\.arte\.tv
-        /playerv2/embed\.php\?json_url=
+        /(?:playerv2/embed|arte_vp/index)\.php\?json_url=
         (?P<json_url>
             http://arte\.tv/papi/tvguide/videos/stream/player/
             (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
index 3eed91279fd7b6ead45bf3ee01486eab53b91d6a..a52d26cecd1e98f8d4a902ed4e8051a42e21e200 100644 (file)
@@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor):
         # audiomack wrapper around soundcloud song
         {
             'add_ie': ['Soundcloud'],
-            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
+            'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
             'info_dict': {
-                'id': '172419696',
+                'id': '258901379',
                 'ext': 'mp3',
-                'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
-                'title': 'Young Thug ft Lil Wayne - Take Kare',
-                'uploader': 'Young Thug World',
-                'upload_date': '20141016',
+                'description': 'mamba day freestyle for the legend Kobe Bryant ',
+                'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+                'uploader': 'ILOVEMAKONNEN',
+                'upload_date': '20160414',
             }
         },
     ]
index 425f08f2b6bba12f054f13aa0de6eefddcebcfa7..74c4510f9b4522b0a914cdf1621bff832ac94638 100644 (file)
@@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE):
         'info_dict': {
             'id': '34475836',
             'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
+            'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
         },
         'playlist_count': 3,
     }, {
index c718cf38509507ce6f314571dd59444ae4f3c647..f0781fc273a18ec30c1ffa97546232d991ad8574 100644 (file)
@@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                         ext = 'flv'
                 if ext is None:
                     ext = determine_ext(url)
-                tbr = int_or_none(rend.get('encodingRate'), 1000),
+                tbr = int_or_none(rend.get('encodingRate'), 1000)
                 a_format = {
                     'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
                     'url': url,
index c621a08d54a2dc405e1863345941485439691fe3..051d783a23cc7c0b5858af0c24f63187181cd276 100644 (file)
@@ -5,7 +5,6 @@ from ..utils import (
     xpath_text,
     xpath_element,
     int_or_none,
-    ExtractorError,
     find_xpath_attr,
 )
 
@@ -64,7 +63,7 @@ class CBSIE(CBSBaseIE):
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
     }]
-    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
+    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -84,11 +83,11 @@ class CBSIE(CBSBaseIE):
             pid = xpath_text(item, 'pid')
             if not pid:
                 continue
-            try:
-                tp_formats, tp_subtitles = self._extract_theplatform_smil(
-                    self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
-            except ExtractorError:
-                continue
+            tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
+            if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
+                tp_release_url += '&manifest=m3u'
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
             formats.extend(tp_formats)
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
         self._sort_formats(formats)
diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py
new file mode 100644 (file)
index 0000000..4f9320e
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class ClipRsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
+    _TEST = {
+        'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
+        'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
+        'info_dict': {
+            'id': '1488842.1399140381',
+            'ext': 'mp4',
+            'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
+            'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
+            'duration': 229,
+            'timestamp': 1459850243,
+            'upload_date': '20160405',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+        response = self._download_json(
+            'http://qi.ckm.onetapi.pl/', video_id,
+            query={
+                'body[id]': video_id,
+                'body[jsonrpc]': '2.0',
+                'body[method]': 'get_asset_detail',
+                'body[params][ID_Publikacji]': video_id,
+                'body[params][Service]': 'www.onet.pl',
+                'content-type': 'application/jsonp',
+                'x-onet-app': 'player.front.onetapi.pl',
+            })
+
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+        video = response['result'].get('0')
+
+        formats = []
+        for _, formats_dict in video['formats'].items():
+            if not isinstance(formats_dict, dict):
+                continue
+            for format_id, format_list in formats_dict.items():
+                if not isinstance(format_list, list):
+                    continue
+                for f in format_list:
+                    if not f.get('url'):
+                        continue
+                    formats.append({
+                        'url': f['url'],
+                        'format_id': format_id,
+                        'height': int_or_none(f.get('vertical_resolution')),
+                        'width': int_or_none(f.get('horizontal_resolution')),
+                        'abr': float_or_none(f.get('audio_bitrate')),
+                        'vbr': float_or_none(f.get('video_bitrate')),
+                    })
+        self._sort_formats(formats)
+
+        meta = video.get('meta', {})
+
+        title = self._og_search_title(webpage, default=None) or meta['title']
+        description = self._og_search_description(webpage, default=None) or meta.get('description')
+        duration = meta.get('length') or meta.get('lenght')
+        timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
index 2b40f3b7c70e2440401ca73f95fe733f74301cae..02cd2c003ce0ab7b305d67e4f0e9c61304795620 100644 (file)
@@ -376,14 +376,13 @@ class InfoExtractor(object):
                 self.to_screen('%s' % (note,))
             else:
                 self.to_screen('%s: %s' % (video_id, note))
-        # data, headers and query params will be ignored for `Request` objects
         if isinstance(url_or_request, compat_urllib_request.Request):
             url_or_request = update_Request(
                 url_or_request, data=data, headers=headers, query=query)
         else:
             if query:
                 url_or_request = update_url_query(url_or_request, query)
-            if data or headers:
+            if data is not None or headers:
                 url_or_request = sanitized_Request(url_or_request, data, headers)
         try:
             return self._downloader.urlopen(url_or_request)
@@ -843,7 +842,7 @@ class InfoExtractor(object):
         for input in re.findall(r'(?i)<input([^>]+)>', html):
             if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
                 continue
-            name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
+            name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
             if not name:
                 continue
             value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
@@ -1534,7 +1533,7 @@ class InfoExtractor(object):
                             media_template = representation_ms_info['media_template']
                             media_template = media_template.replace('$RepresentationID$', representation_id)
                             media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
-                            media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template)
+                            media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
                             media_template.replace('$$', '$')
                             representation_ms_info['segment_urls'] = [
                                 media_template % {
index 6cd395e1169d8253c589efe3da2d24f1632b0356..65a98d7892816e36608a3350cc00db9d7efd4cb9 100644 (file)
@@ -17,37 +17,53 @@ class DemocracynowIE(InfoExtractor):
     IE_NAME = 'democracynow'
     _TESTS = [{
         'url': 'http://www.democracynow.org/shows/2015/7/3',
-        'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
+        'md5': '3757c182d3d84da68f5c8f506c18c196',
         'info_dict': {
             'id': '2015-0703-001',
             'ext': 'mp4',
-            'title': 'July 03, 2015 - Democracy Now!',
-            'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
+            'title': 'Daily Show',
         },
     }, {
         'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
-        'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
         'info_dict': {
             'id': '2015-0703-001',
             'ext': 'mp4',
             'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
             'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
         },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+
         webpage = self._download_webpage(url, display_id)
-        description = self._og_search_description(webpage)
 
         json_data = self._parse_json(self._search_regex(
             r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
             display_id)
-        video_id = None
+
+        title = json_data['title']
         formats = []
 
-        default_lang = 'en'
+        video_id = None
+
+        for key in ('file', 'audio', 'video', 'high_res_video'):
+            media_url = json_data.get(key, '')
+            if not media_url:
+                continue
+            media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
+            video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
+            formats.append({
+                'url': media_url,
+                'vcodec': 'none' if key == 'audio' else None,
+            })
+
+        self._sort_formats(formats)
 
+        default_lang = 'en'
         subtitles = {}
 
         def add_subtitle_item(lang, info_dict):
@@ -67,22 +83,13 @@ class DemocracynowIE(InfoExtractor):
                 'url': compat_urlparse.urljoin(url, subtitle_item['url']),
             })
 
-        for key in ('file', 'audio', 'video'):
-            media_url = json_data.get(key, '')
-            if not media_url:
-                continue
-            media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
-            video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
-            formats.append({
-                'url': media_url,
-            })
-
-        self._sort_formats(formats)
+        description = self._og_search_description(webpage, default=None)
 
         return {
             'id': video_id or display_id,
-            'title': json_data['title'],
+            'title': title,
             'description': description,
+            'thumbnail': json_data.get('image'),
             'subtitles': subtitles,
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py
new file mode 100644 (file)
index 0000000..c74fa7e
--- /dev/null
@@ -0,0 +1,114 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    remove_end,
+    xpath_element,
+    xpath_text,
+)
+
+
+class DigitalSpeakingIE(InfoExtractor):
+    _VALID_URL = r'http://(?:evt\.dispeak|events\.digitallyspeaking)\.com/([^/]+/)+xml/(?P<id>[^.]+).xml'
+
+    _TESTS = [{
+        # From http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml
+        'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
+        'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+        'info_dict': {
+            'id': '840376_BQRC',
+            'ext': 'mp4',
+            'title': 'Tenacious Design and The Interface of \'Destiny\'',
+        },
+    }, {
+        # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
+        'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
+        'only_matching': True,
+    }]
+
+    def _parse_mp4(self, metadata):
+        video_formats = []
+        video_root = None
+
+        mp4_video = xpath_text(metadata, './mp4video', default=None)
+        if mp4_video is not None:
+            mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
+            video_root = mobj.group('root')
+        if video_root is None:
+            http_host = xpath_text(metadata, 'httpHost', default=None)
+            if http_host:
+                video_root = 'http://%s/' % http_host
+        if video_root is None:
+            # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
+            # Works for GPUTechConf, too
+            video_root = 'http://s3-2u.digitallyspeaking.com/'
+
+        formats = metadata.findall('./MBRVideos/MBRVideo')
+        if not formats:
+            return None
+        for a_format in formats:
+            stream_name = xpath_text(a_format, 'streamName', fatal=True)
+            video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
+            url = video_root + video_path
+            vbr = xpath_text(a_format, 'bitrate')
+            video_formats.append({
+                'url': url,
+                'vbr': int_or_none(vbr),
+            })
+        return video_formats
+
+    def _parse_flv(self, metadata):
+        formats = []
+        akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
+        audios = metadata.findall('./audios/audio')
+        for audio in audios:
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(audio.get('url'), '.flv'),
+                'ext': 'flv',
+                'vcodec': 'none',
+                'format_id': audio.get('code'),
+            })
+        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
+        formats.append({
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(slide_video_path, '.flv'),
+            'ext': 'flv',
+            'format_note': 'slide deck video',
+            'quality': -2,
+            'preference': -2,
+            'format_id': 'slides',
+        })
+        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
+        formats.append({
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(speaker_video_path, '.flv'),
+            'ext': 'flv',
+            'format_note': 'speaker video',
+            'quality': -1,
+            'preference': -1,
+            'format_id': 'speaker',
+        })
+        return formats
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        xml_description = self._download_xml(url, video_id)
+        metadata = xpath_element(xml_description, 'metadata')
+
+        video_formats = self._parse_mp4(metadata)
+        if video_formats is None:
+            video_formats = self._parse_flv(metadata)
+
+        return {
+            'id': video_id,
+            'formats': video_formats,
+            'title': xpath_text(metadata, 'title', fatal=True),
+            'duration': parse_duration(xpath_text(metadata, 'endTime')),
+            'creator': xpath_text(metadata, 'speaker'),
+        }
index 7bbf617d468f9d5295ee2e1123452679a21a7e4b..fa3cb70231f57d3b6b11e31d2ebce70d5267690f 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -55,8 +56,13 @@ class EaglePlatformIE(InfoExtractor):
             raise ExtractorError(' '.join(response['errors']), expected=True)
 
     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
-        response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
-        self._handle_error(response)
+        try:
+            response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+        except ExtractorError as ee:
+            if isinstance(ee.cause, compat_HTTPError):
+                response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
+                self._handle_error(response)
+            raise
         return response
 
     def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
index b6bfd2b2dedc5388ef383a3cd8853bbb0c541f68..c97682cd367edebfd9fc6a476ad073cb03240054 100644 (file)
@@ -4,10 +4,10 @@ from .common import InfoExtractor
 
 
 class EbaumsWorldIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
+        'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
         'info_dict': {
             'id': '83367677',
             'ext': 'mp4',
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
new file mode 100644 (file)
index 0000000..d28f9e8
--- /dev/null
@@ -0,0 +1,996 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+from .abc import ABCIE
+from .abc7news import Abc7NewsIE
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+    ACastIE,
+    ACastChannelIE,
+)
+from .addanime import AddAnimeIE
+from .adobetv import (
+    AdobeTVIE,
+    AdobeTVShowIE,
+    AdobeTVChannelIE,
+    AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import AENetworksIE
+from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .animeondemand import AnimeOnDemandIE
+from .anitube import AnitubeIE
+from .anysex import AnySexIE
+from .aol import (
+    AolIE,
+    AolFeaturesIE,
+)
+from .allocine import AllocineIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+    AppleTrailersIE,
+    AppleTrailersSectionIE,
+)
+from .archiveorg import ArchiveOrgIE
+from .ard import (
+    ARDIE,
+    ARDMediathekIE,
+    SportschauIE,
+)
+from .arte import (
+    ArteTvIE,
+    ArteTVPlus7IE,
+    ArteTVCreativeIE,
+    ArteTVConcertIE,
+    ArteTVInfoIE,
+    ArteTVFutureIE,
+    ArteTVCinemaIE,
+    ArteTVDDCIE,
+    ArteTVMagazineIE,
+    ArteTVEmbedIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atttechchannel import ATTTechChannelIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .azubu import AzubuIE, AzubuLiveIE
+from .baidu import BaiduVideoIE
+from .bambuser import BambuserIE, BambuserChannelIE
+from .bandcamp import BandcampIE, BandcampAlbumIE
+from .bbc import (
+    BBCCoUkIE,
+    BBCCoUkArticleIE,
+    BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
+from .bet import BetIE
+from .bigflix import BigflixIE
+from .bild import BildIE
+from .bilibili import BiliBiliIE
+from .biobiochiletv import BioBioChileTVIE
+from .bleacherreport import (
+    BleacherReportIE,
+    BleacherReportCMSIE,
+)
+from .blinkx import BlinkxIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bpb import BpbIE
+from .br import BRIE
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .camdemy import (
+    CamdemyIE,
+    CamdemyFolderIE
+)
+from .camwithher import CamWithHerIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import CanvasIE
+from .cbc import (
+    CBCIE,
+    CBCPlayerIE,
+)
+from .cbs import CBSIE
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+    CBSNewsIE,
+    CBSNewsLiveVideoIE,
+)
+from .cbssports import CBSSportsIE
+from .ccc import CCCIE
+from .cda import CDAIE
+from .ceskatelevize import CeskaTelevizeIE
+from .channel9 import Channel9IE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+    ChirbitIE,
+    ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemassacre import CinemassacreIE
+from .cliprs import ClipRsIE
+from .clipfish import ClipfishIE
+from .cliphunter import CliphunterIE
+from .clipsyndicate import ClipsyndicateIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import CNBCIE
+from .cnn import (
+    CNNIE,
+    CNNBlogsIE,
+    CNNArticleIE,
+)
+from .collegehumor import CollegeHumorIE
+from .collegerama import CollegeRamaIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comcarcoff import ComCarCoffIE
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import RtmpIE
+from .condenast import CondeNastIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .criterion import CriterionIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crunchyroll import (
+    CrunchyrollIE,
+    CrunchyrollShowPlaylistIE
+)
+from .cspan import CSpanIE
+from .ctsnews import CtsNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .cwtv import CWTVIE
+from .dailymotion import (
+    DailymotionIE,
+    DailymotionPlaylistIE,
+    DailymotionUserIE,
+    DailymotionCloudIE,
+)
+from .daum import (
+    DaumIE,
+    DaumClipIE,
+    DaumPlaylistIE,
+    DaumUserIE,
+)
+from .dbtv import DBTVIE
+from .dcn import (
+    DCNIE,
+    DCNVideoIE,
+    DCNLiveIE,
+    DCNSeasonIE,
+)
+from .dctp import DctpTvIE
+from .deezer import DeezerPlaylistIE
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
+from .dplay import DPlayIE
+from .dramafever import (
+    DramaFeverIE,
+    DramaFeverSeriesIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import DRTVIE
+from .dvtv import DVTVIE
+from .dump import DumpIE
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .discovery import DiscoveryIE
+from .dispeak import DigitalSpeakingIE
+from .dropbox import DropboxIE
+from .dw import (
+    DWIE,
+    DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentv import (
+    EllenTVIE,
+    EllenTVClipsIE,
+)
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
+from .escapist import EscapistIE
+from .espn import ESPNIE
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .everyonesmixtape import EveryonesMixtapeIE
+from .exfm import ExfmIE
+from .expotv import ExpoTVIE
+from .extremetube import ExtremeTubeIE
+from .facebook import FacebookIE
+from .faz import FazIE
+from .fc2 import FC2IE
+from .fczenit import FczenitIE
+from .firstpost import FirstpostIE
+from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
+from .fktv import FKTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .fourtube import FourTubeIE
+from .fox import FOXIE
+from .foxgay import FoxgayIE
+from .foxnews import FoxNewsIE
+from .foxsports import FoxSportsIE
+from .franceculture import (
+    FranceCultureIE,
+    FranceCultureEmissionIE,
+)
+from .franceinter import FranceInterIE
+from .francetv import (
+    PluzzIE,
+    FranceTvInfoIE,
+    FranceTVIE,
+    GenerationQuoiIE,
+    CultureboxIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .freevideo import FreeVideoIE
+from .funimation import FunimationIE
+from .funnyordie import FunnyOrDieIE
+from .gameinformer import GameInformerIE
+from .gamekings import GamekingsIE
+from .gameone import (
+    GameOneIE,
+    GameOnePlaylistIE,
+)
+from .gamersyde import GamersydeIE
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .generic import GenericIE
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+    GloboIE,
+    GloboArticleIE,
+)
+from .godtube import GodTubeIE
+from .goldenmoustache import GoldenMoustacheIE
+from .golem import GolemIE
+from .googledrive import GoogleDriveIE
+from .googleplus import GooglePlusIE
+from .googlesearch import GoogleSearchIE
+from .goshgay import GoshgayIE
+from .gputechconf import GPUTechConfIE
+from .groupon import GrouponIE
+from .hark import HarkIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hornbunny import HornBunnyIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import HotStarIE
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .huffpost import HuffPostIE
+from .hypem import HypemIE
+from .iconosquare import IconosquareIE
+from .ign import (
+    IGNIE,
+    OneUPIE,
+    PCMagIE,
+)
+from .imdb import (
+    ImdbIE,
+    ImdbListIE
+)
+from .imgur import (
+    ImgurIE,
+    ImgurAlbumIE,
+)
+from .ina import InaIE
+from .indavideo import (
+    IndavideoIE,
+    IndavideoEmbedIE,
+)
+from .infoq import InfoQIE
+from .instagram import InstagramIE, InstagramUserIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
+from .ir90tv import Ir90TvIE
+from .ivi import (
+    IviIE,
+    IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .izlesene import IzleseneIE
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .jwplatform import JWPlatformIE
+from .jpopsukitv import JpopsukiIE
+from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
+from .kankan import KankanIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .khanacademy import KhanAcademyIE
+from .kickstarter import KickStarterIE
+from .keek import KeekIE
+from .konserthusetplay import KonserthusetPlayIE
+from .kontrtube import KontrTubeIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+    KuwoIE,
+    KuwoAlbumIE,
+    KuwoChartIE,
+    KuwoSingerIE,
+    KuwoCategoryIE,
+    KuwoMvIE,
+)
+from .la7 import LA7IE
+from .laola1tv import Laola1TvIE
+from .lecture2go import Lecture2GoIE
+from .lemonde import LemondeIE
+from .leeco import (
+    LeIE,
+    LePlaylistIE,
+    LetvCloudIE,
+)
+from .libsyn import LibsynIE
+from .lifenews import (
+    LifeNewsIE,
+    LifeEmbedIE,
+)
+from .limelight import (
+    LimelightMediaIE,
+    LimelightChannelIE,
+    LimelightChannelListIE,
+)
+from .liveleak import LiveLeakIE
+from .livestream import (
+    LivestreamIE,
+    LivestreamOriginalIE,
+    LivestreamShortenerIE,
+)
+from .lnkgo import LnkGoIE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import LRTIE
+from .lynda import (
+    LyndaIE,
+    LyndaCourseIE
+)
+from .m6 import M6IE
+from .macgamestore import MacGameStoreIE
+from .mailru import MailRuIE
+from .makerschannel import MakersChannelIE
+from .makertv import MakerTVIE
+from .malemotion import MalemotionIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .minhateca import MinhatecaIE
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixcloud import (
+    MixcloudIE,
+    MixcloudUserIE,
+    MixcloudPlaylistIE,
+    MixcloudStreamIE,
+)
+from .mlb import MLBIE
+from .mnet import MnetIE
+from .mpora import MporaIE
+from .moevideo import MoeVideoIE
+from .mofosex import MofosexIE
+from .mojvideo import MojvideoIE
+from .moniker import MonikerIE
+from .morningstar import MorningstarIE
+from .motherless import MotherlessIE
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviezine import MoviezineIE
+from .mtv import (
+    MTVIE,
+    MTVServicesEmbeddedIE,
+    MTVIggyIE,
+    MTVDEIE,
+)
+from .muenchentv import MuenchenTVIE
+from .musicplayon import MusicPlayOnIE
+from .muzu import MuzuTVIE
+from .mwave import MwaveIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import MyviIE
+from .myvideo import MyVideoIE
+from .myvidster import MyVidsterIE
+from .nationalgeographic import (
+    NationalGeographicIE,
+    NationalGeographicChannelIE,
+)
+from .naver import NaverIE
+from .nba import NBAIE
+from .nbc import (
+    CSNNEIE,
+    NBCIE,
+    NBCNewsIE,
+    NBCSportsIE,
+    NBCSportsVPlayerIE,
+    MSNBCIE,
+)
+from .ndr import (
+    NDRIE,
+    NJoyIE,
+    NDREmbedBaseIE,
+    NDREmbedIE,
+    NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
+from .nerdcubed import NerdCubedFeedIE
+from .neteasemusic import (
+    NetEaseMusicIE,
+    NetEaseMusicAlbumIE,
+    NetEaseMusicSingerIE,
+    NetEaseMusicListIE,
+    NetEaseMusicMvIE,
+    NetEaseMusicProgramIE,
+    NetEaseMusicDjRadioIE,
+)
+from .newgrounds import NewgroundsIE
+from .newstube import NewstubeIE
+from .nextmedia import (
+    NextMediaIE,
+    NextMediaActionNewsIE,
+    AppleDailyIE,
+)
+from .nextmovie import NextMovieIE
+from .nfb import NFBIE
+from .nfl import NFLIE
+from .nhl import (
+    NHLIE,
+    NHLNewsIE,
+    NHLVideocenterIE,
+)
+from .nick import NickIE
+from .niconico import NiconicoIE, NiconicoPlaylistIE
+from .ninegag import NineGagIE
+from .noco import NocoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import NovaIE
+from .novamov import (
+    AuroraVidIE,
+    CloudTimeIE,
+    NowVideoIE,
+    VideoWeedIE,
+    WholeCloudIE,
+)
+from .nowness import (
+    NownessIE,
+    NownessPlaylistIE,
+    NownessSeriesIE,
+)
+from .nowtv import (
+    NowTVIE,
+    NowTVListIE,
+)
+from .noz import NozIE
+from .npo import (
+    NPOIE,
+    NPOLiveIE,
+    NPORadioIE,
+    NPORadioFragmentIE,
+    SchoolTVIE,
+    VPROIE,
+    WNLIE
+)
+from .npr import NprIE
+from .nrk import (
+    NRKIE,
+    NRKPlaylistIE,
+    NRKSkoleIE,
+    NRKTVIE,
+)
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+    NYTimesIE,
+    NYTimesArticleIE,
+)
+from .nuvid import NuvidIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+    OoyalaIE,
+    OoyalaExternalIE,
+)
+from .openload import OpenloadIE
+from .ora import OraTVIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+    ORFIPTVIE,
+)
+from .pandoratv import PandoraTVIE
+from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
+from .pbs import PBSIE
+from .people import PeopleIE
+from .periscope import PeriscopeIE
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .pinkbike import PinkbikeIE
+from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
+from .played import PlayedIE
+from .playfm import PlayFMIE
+from .plays import PlaysTVIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .pluralsight import (
+    PluralsightIE,
+    PluralsightCourseIE,
+)
+from .podomatic import PodomaticIE
+from .porn91 import Porn91IE
+from .pornhd import PornHdIE
+from .pornhub import (
+    PornHubIE,
+    PornHubPlaylistIE,
+    PornHubUserVideosIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .presstv import PressTVIE
+from .primesharetv import PrimeShareTVIE
+from .promptfile import PromptFileIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+    QQMusicIE,
+    QQMusicSingerIE,
+    QQMusicAlbumIE,
+    QQMusicToplistIE,
+    QQMusicPlaylistIE,
+)
+from .r7 import R7IE
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import RadioFranceIE
+from .rai import (
+    RaiTVIE,
+    RaiIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .restudy import RestudyIE
+from .reverbnation import ReverbNationIE
+from .revision3 import Revision3IE
+from .rice import RICEIE
+from .ringtv import RingTVIE
+from .ro220 import Ro220IE
+from .rottentomatoes import RottenTomatoesIE
+from .roxwel import RoxwelIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import RTL2IE
+from .rtp import RTPIE
+from .rts import RTSIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
+from .rtvnh import RTVNHIE
+from .ruhd import RUHDIE
+from .ruleporn import RulePornIE
+from .rutube import (
+    RutubeIE,
+    RutubeChannelIE,
+    RutubeEmbedIE,
+    RutubeMovieIE,
+    RutubePersonIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .sandia import SandiaIE
+from .safari import (
+    SafariIE,
+    SafariApiIE,
+    SafariCourseIE,
+)
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .scivee import SciVeeIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .screenjunkies import ScreenJunkiesIE
+from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
+from .senateisvp import SenateISVPIE
+from .servingsys import ServingSysIE
+from .sexu import SexuIE
+from .sexykarma import SexyKarmaIE
+from .shahid import ShahidIE
+from .shared import SharedIE
+from .sharesix import ShareSixIE
+from .sina import SinaIE
+from .skynewsarabia import (
+    SkyNewsArabiaIE,
+    SkyNewsArabiaArticleIE,
+)
+from .slideshare import SlideshareIE
+from .slutload import SlutloadIE
+from .smotri import (
+    SmotriIE,
+    SmotriCommunityIE,
+    SmotriUserIE,
+    SmotriBroadcastIE,
+)
+from .snagfilms import (
+    SnagFilmsIE,
+    SnagFilmsEmbedIE,
+)
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .soundcloud import (
+    SoundcloudIE,
+    SoundcloudSetIE,
+    SoundcloudUserIE,
+    SoundcloudPlaylistIE,
+    SoundcloudSearchIE
+)
+from .soundgasm import (
+    SoundgasmIE,
+    SoundgasmProfileIE
+)
+from .southpark import (
+    SouthParkIE,
+    SouthParkDeIE,
+    SouthParkDkIE,
+    SouthParkEsIE,
+    SouthParkNlIE
+)
+from .spankbang import SpankBangIE
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE, SpiegelArticleIE
+from .spiegeltv import SpiegeltvIE
+from .spike import SpikeIE
+from .stitcher import StitcherIE
+from .sport5 import Sport5IE
+from .sportbox import (
+    SportBoxIE,
+    SportBoxEmbedIE,
+)
+from .sportdeutschland import SportDeutschlandIE
+from .srgssr import (
+    SRGSSRIE,
+    SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .steam import SteamIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
+from .sunporno import SunPornoIE
+from .svt import (
+    SVTIE,
+    SVTPlayIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .tapely import TapelyIE
+from .tass import TassIE
+from .tdslifeway import TDSLifewayIE
+from .teachertube import (
+    TeacherTubeIE,
+    TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .techtalks import TechTalksIE
+from .ted import TEDIE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telemb import TeleMBIE
+from .teletask import TeleTaskIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+    ThePlatformIE,
+    ThePlatformFeedIE,
+)
+from .thescene import TheSceneIE
+from .thesixtyone import TheSixtyOneIE
+from .thestar import TheStarIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .tinypic import TinyPicIE
+from .tlc import TlcDeIE
+from .tmz import (
+    TMZIE,
+    TMZArticleIE,
+)
+from .tnaflix import (
+    TNAFlixNetworkEmbedIE,
+    TNAFlixIE,
+    EMPFlixIE,
+    MovieFapIE,
+)
+from .toggle import ToggleIE
+from .thvideo import (
+    THVideoIE,
+    THVideoPlaylistIE
+)
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trollvids import TrollvidsIE
+from .trutube import TruTubeIE
+from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
+from .tudou import (
+    TudouIE,
+    TudouPlaylistIE,
+    TudouAlbumIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+    TuneInClipIE,
+    TuneInStationIE,
+    TuneInProgramIE,
+    TuneInTopicIE,
+    TuneInShortenerIE,
+)
+from .turbo import TurboIE
+from .tutv import TutvIE
+from .tv2 import (
+    TV2IE,
+    TV2ArticleIE,
+)
+from .tv3 import TV3IE
+from .tv4 import TV4IE
+from .tvc import (
+    TVCIE,
+    TVCArticleIE,
+)
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvp import TvpIE, TvpSeriesIE
+from .tvplay import TVPlayIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentytwotracks import (
+    TwentyTwoTracksIE,
+    TwentyTwoTracksGenreIE
+)
+from .twitch import (
+    TwitchVideoIE,
+    TwitchChapterIE,
+    TwitchVodIE,
+    TwitchProfileIE,
+    TwitchPastBroadcastsIE,
+    TwitchBookmarksIE,
+    TwitchStreamIE,
+)
+from .twitter import (
+    TwitterCardIE,
+    TwitterIE,
+    TwitterAmplifyIE,
+)
+from .udemy import (
+    UdemyIE,
+    UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .digiteka import DigitekaIE
+from .unistra import UnistraIE
+from .urort import UrortIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import UstudioIE
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veoh import VeohIE
+from .vessel import VesselIE
+from .vesti import VestiIE
+from .vevo import VevoIE
+from .vgtv import (
+    BTArticleIE,
+    BTVestlendingenIE,
+    VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+    ViceIE,
+    ViceShowIE,
+)
+from .viddler import ViddlerIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomega import VideoMegaIE
+from .videomore import (
+    VideomoreIE,
+    VideomoreVideoIE,
+    VideomoreSeasonIE,
+)
+from .videopremium import VideoPremiumIE
+from .videott import VideoTtIE
+from .vidme import (
+    VidmeIE,
+    VidmeUserIE,
+    VidmeUserLikesIE,
+)
+from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
+from .viidea import ViideaIE
+from .vimeo import (
+    VimeoIE,
+    VimeoAlbumIE,
+    VimeoChannelIE,
+    VimeoGroupsIE,
+    VimeoLikesIE,
+    VimeoOndemandIE,
+    VimeoReviewIE,
+    VimeoUserIE,
+    VimeoWatchLaterIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+    VineIE,
+    VineUserIE,
+)
+from .viki import (
+    VikiIE,
+    VikiChannelIE,
+)
+from .vk import (
+    VKIE,
+    VKUserVideosIE,
+)
+from .vlive import VLiveIE
+from .vodlocker import VodlockerIE
+from .voicerepublic import VoiceRepublicIE
+from .voxmedia import VoxMediaIE
+from .vporn import VpornIE
+from .vrt import VRTIE
+from .vube import VubeIE
+from .vuclip import VuClipIE
+from .vulture import VultureIE
+from .walla import WallaIE
+from .washingtonpost import WashingtonPostIE
+from .wat import WatIE
+from .wdr import (
+    WDRIE,
+    WDRMobileIE,
+    WDRMausIE,
+)
+from .webofstories import (
+    WebOfStoriesIE,
+    WebOfStoriesPlaylistIE,
+)
+from .weibo import WeiboIE
+from .weiqitv import WeiqiTVIE
+from .wimp import WimpIE
+from .wistia import WistiaIE
+from .worldstarhiphop import WorldStarHipHopIE
+from .wrzuta import WrzutaIE
+from .wsj import WSJIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+    XHamsterIE,
+    XHamsterEmbedIE,
+)
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+    YahooIE,
+    YahooSearchIE,
+)
+from .yam import YamIE
+from .yandexmusic import (
+    YandexMusicTrackIE,
+    YandexMusicAlbumIE,
+    YandexMusicPlaylistIE,
+)
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import YoukuIE
+from .youporn import YouPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+    YoutubeIE,
+    YoutubeChannelIE,
+    YoutubeFavouritesIE,
+    YoutubeHistoryIE,
+    YoutubeLiveIE,
+    YoutubePlaylistIE,
+    YoutubePlaylistsIE,
+    YoutubeRecommendedIE,
+    YoutubeSearchDateIE,
+    YoutubeSearchIE,
+    YoutubeSearchURLIE,
+    YoutubeShowIE,
+    YoutubeSubscriptionsIE,
+    YoutubeTruncatedIDIE,
+    YoutubeTruncatedURLIE,
+    YoutubeUserIE,
+    YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zdf import ZDFIE, ZDFChannelIE
+from .zingmp3 import (
+    ZingMp3SongIE,
+    ZingMp3AlbumIE,
+)
+from .zippcast import ZippCastIE
index 98b165143fe8b3f3e970ad602856b4266c59701c..88bca100763337011a444369543a1479296e097e 100644 (file)
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_xpath
+from ..utils import (
+    int_or_none,
+    qualities,
+    unified_strdate,
+    xpath_attr,
+    xpath_element,
+    xpath_text,
+    xpath_with_ns,
+)
 
 
 class FirstTVIE(InfoExtractor):
     IE_NAME = '1tv'
     IE_DESC = 'Первый канал'
-    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://www.1tv.ru/videoarchive/73390',
-        'md5': '777f525feeec4806130f4f764bc18a4f',
+        # single format via video_materials.json API
+        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
+        'md5': '82a2777648acae812d58b3f5bd42882b',
         'info_dict': {
-            'id': '73390',
+            'id': '35930',
             'ext': 'mp4',
-            'title': 'Ð\9eлимпийÑ\81кие ÐºÐ°Ð½Ð°Ñ\82нÑ\8bе Ð´Ð¾Ñ\80оги',
-            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'title': 'Ð\93оÑ\81Ñ\82Ñ\8c Ð\9bÑ\8eдмила Ð¡ÐµÐ½Ñ\87ина. Ð\9dаедине Ñ\81о Ð²Ñ\81еми. Ð\92Ñ\8bпÑ\83Ñ\81к Ð¾Ñ\82 12.02.2015',
+            'description': 'md5:357933adeede13b202c7c21f91b871b2',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
-            'duration': 149,
-            'like_count': int,
-            'dislike_count': int,
+            'upload_date': '20150212',
+            'duration': 2694,
         },
-        'skip': 'Only works from Russia',
     }, {
-        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
-        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+        # multiple formats via video_materials.json API
+        'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
         'info_dict': {
-            'id': '35930',
+            'id': '113641',
             'ext': 'mp4',
-            'title': 'Ð\9dаедине Ñ\81о Ð²Ñ\81еми. Ð\9bÑ\8eдмила Ð¡ÐµÐ½Ñ\87ина',
-            'description': 'md5:89553aed1d641416001fe8d450f06cb9',
+            'title': 'Ð\92еÑ\81еннÑ\8fÑ\8f Ð°Ð»Ð»ÐµÑ\80гиÑ\8f. Ð\94обÑ\80ое Ñ\83Ñ\82Ñ\80о. Ð¤Ñ\80агменÑ\82 Ð²Ñ\8bпÑ\83Ñ\81ка Ð¾Ñ\82 07.04.2016',
+            'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
-            'duration': 2694,
+            'upload_date': '20160407',
+            'duration': 179,
+            'formats': 'mincount:3',
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'Only works from Russia',
+    }, {
+        # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
+        'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
+        'md5': '519d306c5b5669761fd8906c39dbee23',
+        'info_dict': {
+            'id': '47038',
+            'ext': 'mp4',
+            'title': '"Побег". Второй сезон. 3 серия',
+            'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
+            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+            'upload_date': '20120516',
+            'duration': 3080,
+        },
+    }, {
+        'url': 'http://www.1tv.ru/videoarchive/9967',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id, 'Downloading page')
+        # Videos with multiple formats only available via this API
+        video = self._download_json(
+            'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
+            video_id, fatal=False)
 
-        video_url = self._html_search_regex(
-            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
-            webpage, 'video URL')
+        description, thumbnail, upload_date, duration = [None] * 4
 
-        title = self._html_search_regex(
-            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
-             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
-        description = self._html_search_regex(
-            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
-            webpage, 'description', default=None) or self._html_search_meta(
-                'description', webpage, 'description')
+        if video:
+            item = video[0]
+            title = item['title']
+            quality = qualities(('ld', 'sd', 'hd', ))
+            formats = [{
+                'url': f['src'],
+                'format_id': f.get('name'),
+                'quality': quality(f.get('name')),
+            } for f in item['mbr'] if f.get('src')]
+            thumbnail = item.get('poster')
+        else:
+            # Some videos are not available via video_materials.json
+            video = self._download_xml(
+                'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
+                video_id)
+
+            NS_MAP = {
+                'media': 'http://search.yahoo.com/mrss/',
+            }
 
-        thumbnail = self._og_search_thumbnail(webpage)
-        duration = self._og_search_property(
-            'video:duration', webpage,
-            'video duration', fatal=False)
+            item = xpath_element(video, './channel/item', fatal=True)
+            title = xpath_text(item, './title', fatal=True)
+            formats = [{
+                'url': content.attrib['url'],
+            } for content in item.findall(
+                compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
+            thumbnail = xpath_attr(
+                item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
 
-        like_count = self._html_search_regex(
-            r'title="Понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'like count', default=None)
-        dislike_count = self._html_search_regex(
-            r'title="Не понравилось".*?/></label> \[(\d+)\]',
-            webpage, 'dislike count', default=None)
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
+        if webpage:
+            title = self._html_search_regex(
+                (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+                 r"'title'\s*:\s*'([^']+)'"),
+                webpage, 'title', default=None) or title
+            description = self._html_search_regex(
+                r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+                webpage, 'description', default=None) or self._html_search_meta(
+                'description', webpage, 'description')
+            thumbnail = thumbnail or self._og_search_thumbnail(webpage)
+            duration = int_or_none(self._html_search_meta(
+                'video:duration', webpage, 'video duration', fatal=False))
+            upload_date = unified_strdate(self._html_search_meta(
+                'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
-            'url': video_url,
             'thumbnail': thumbnail,
             'title': title,
             'description': description,
+            'upload_date': upload_date,
             'duration': int_or_none(duration),
-            'like_count': int_or_none(like_count),
-            'dislike_count': int_or_none(dislike_count),
+            'formats': formats
         }
index 4c4a87e2a3337bfb5de955a329c7edba2c338ad2..8c5ffc9e84cec305e9fc813a6366b360b7e36230 100644 (file)
@@ -46,8 +46,8 @@ class FunnyOrDieIE(InfoExtractor):
         links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
 
         m3u8_url = self._search_regex(
-            r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
-            webpage, 'm3u8 url', default=None, group='url')
+            r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
+            webpage, 'm3u8 url', group='url')
 
         formats = []
 
index ea32b621c390c390e22ebf8a6010304466700a4a..ba1c15414d2266c5bef581b9a5950f6d23bfa5bb 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class GazetaIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
     _TESTS = [{
         'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
         'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
@@ -18,9 +18,22 @@ class GazetaIE(InfoExtractor):
             'description': 'md5:38617526050bd17b234728e7f9620a71',
             'thumbnail': 're:^https?://.*\.jpg',
         },
+        'skip': 'video not found',
     }, {
         'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
         'only_matching': True,
+    }, {
+        'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
+        'info_dict': {
+            'id': '252048',
+            'ext': 'mp4',
+            'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['EaglePlatform'],
     }]
 
     def _real_extract(self, url):
index 59ed4c38f654f75c2217ce04e1f350295c871de7..5d45faf859dcbf14cf361ab36758dc9eee851063 100644 (file)
@@ -4,7 +4,6 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    remove_end,
     HEADRequest,
     sanitized_Request,
     urlencode_postdata,
@@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor):
         {
             'url': 'http://gdcvault.com/play/1020791/',
             'only_matching': True,
-        }
+        },
+        {
+            # Hard-coded hostname
+            'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
+            'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
+            'info_dict': {
+                'id': '1023460',
+                'ext': 'mp4',
+                'display_id': 'Tenacious-Design-and-The-Interface',
+                'title': 'Tenacious Design and The Interface of \'Destiny\'',
+            },
+        },
+        {
+            # Multiple audios
+            'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
+            'info_dict': {
+                'id': '1014631',
+                'ext': 'flv',
+                'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
+            },
+            'params': {
+                'skip_download': True,  # Requires rtmpdump
+                'format': 'jp',  # The japanese audio
+            }
+        },
     ]
 
-    def _parse_mp4(self, xml_description):
-        video_formats = []
-        mp4_video = xml_description.find('./metadata/mp4video')
-        if mp4_video is None:
-            return None
-
-        mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
-        video_root = mobj.group('root')
-        formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
-        for format in formats:
-            mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
-            url = video_root + mobj.group('path')
-            vbr = format.find('bitrate').text
-            video_formats.append({
-                'url': url,
-                'vbr': int(vbr),
-            })
-        return video_formats
-
-    def _parse_flv(self, xml_description):
-        formats = []
-        akamai_url = xml_description.find('./metadata/akamaiHost').text
-        audios = xml_description.find('./metadata/audios')
-        if audios is not None:
-            for audio in audios:
-                formats.append({
-                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-                    'play_path': remove_end(audio.get('url'), '.flv'),
-                    'ext': 'flv',
-                    'vcodec': 'none',
-                    'format_id': audio.get('code'),
-                })
-        slide_video_path = xml_description.find('./metadata/slideVideo').text
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(slide_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'slide deck video',
-            'quality': -2,
-            'preference': -2,
-            'format_id': 'slides',
-        })
-        speaker_video_path = xml_description.find('./metadata/speakerVideo').text
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(speaker_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'speaker video',
-            'quality': -1,
-            'preference': -1,
-            'format_id': 'speaker',
-        })
-        return formats
-
     def _login(self, webpage_url, display_id):
         (username, password) = self._get_login_info()
         if username is None or password is None:
@@ -159,9 +128,10 @@ class GDCVaultIE(InfoExtractor):
                 'title': title,
             }
 
+        PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
+
         xml_root = self._html_search_regex(
-            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
-            start_page, 'xml root', default=None)
+            PLAYER_REGEX, start_page, 'xml root', default=None)
         if xml_root is None:
             # Probably need to authenticate
             login_res = self._login(webpage_url, display_id)
@@ -171,27 +141,21 @@ class GDCVaultIE(InfoExtractor):
                 start_page = login_res
                 # Grab the url from the authenticated page
                 xml_root = self._html_search_regex(
-                    r'<iframe src="(.*?)player.html.*?".*?</iframe>',
-                    start_page, 'xml root')
+                    PLAYER_REGEX, start_page, 'xml root')
 
         xml_name = self._html_search_regex(
             r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
             start_page, 'xml filename', default=None)
         if xml_name is None:
             # Fallback to the older format
-            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
-
-        xml_description_url = xml_root + 'xml/' + xml_name
-        xml_description = self._download_xml(xml_description_url, display_id)
-
-        video_title = xml_description.find('./metadata/title').text
-        video_formats = self._parse_mp4(xml_description)
-        if video_formats is None:
-            video_formats = self._parse_flv(xml_description)
+            xml_name = self._html_search_regex(
+                r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
+                start_page, 'xml filename')
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
             'display_id': display_id,
-            'title': video_title,
-            'formats': video_formats,
+            'url': '%s/xml/%s' % (xml_root, xml_name),
+            'ie': 'DigitalSpeaking',
         }
index 589d1e1525a67f92e757d8604d35a095ec397412..95d23325900e8ed0d61bcec5d09ff3ba3d5e7a82 100644 (file)
@@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
 
 
 class GenericIE(InfoExtractor):
@@ -104,7 +105,8 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,  # infinite live stream
             },
             'expected_warnings': [
-                r'501.*Not Implemented'
+                r'501.*Not Implemented',
+                r'400.*Bad Request',
             ],
         },
         # Direct link with incorrect MIME type
@@ -1128,6 +1130,30 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        # Another form of arte.tv embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+            'md5': '850bfe45417ddf221288c88a0cffe2e2',
+            'info_dict': {
+                'id': '030273-562_PLUS7-F',
+                'ext': 'mp4',
+                'title': 'ARTE Reportage - Nulle part, en France',
+                'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+                'upload_date': '20160409',
+            },
+        },
+        # LiveLeak embed
+        {
+            'url': 'http://www.wykop.pl/link/3088787/',
+            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+            'info_dict': {
+                'id': '874_1459135191',
+                'ext': 'mp4',
+                'title': 'Man shows poor quality of new apartment building',
+                'description': 'The wall is like a sand pile.',
+                'uploader': 'Lake8737',
+            }
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -1702,7 +1728,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded arte.tv player
         mobj = re.search(
-            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
             webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'ArteTVEmbed')
@@ -1930,7 +1956,13 @@ class GenericIE(InfoExtractor):
         # Look for Instagram embeds
         instagram_embed_url = InstagramIE._extract_embed_url(webpage)
         if instagram_embed_url is not None:
-            return self.url_result(instagram_embed_url, InstagramIE.ie_key())
+            return self.url_result(
+                self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+        # Look for LiveLeak embeds
+        liveleak_url = LiveLeakIE._extract_url(webpage)
+        if liveleak_url:
+            return self.url_result(liveleak_url, 'LiveLeak')
 
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
index 9561ed5fbaa25404654303956a676b000da2af67..62ff84835c87b28d18ace1afa5eee19f894d198d 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import unified_strdate
 
 
 class GlideIE(InfoExtractor):
@@ -15,26 +16,38 @@ class GlideIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Damon Timm\'s Glide message',
             'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+            'uploader': 'Damon Timm',
+            'upload_date': '20140919',
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+
         title = self._html_search_regex(
-            r'<title>(.*?)</title>', webpage, 'title')
-        video_url = self.http_scheme() + self._search_regex(
-            r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
-        thumbnail_url = self._search_regex(
-            r'<img id="video-thumbnail" src="(.*?)"',
-            webpage, 'thumbnail url', fatal=False)
-        thumbnail = (
-            thumbnail_url if thumbnail_url is None
-            else self.http_scheme() + thumbnail_url)
+            r'<title>(.+?)</title>', webpage, 'title')
+        video_url = self._proto_relative_url(self._search_regex(
+            r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'video URL', default=None,
+            group='url')) or self._og_search_video_url(webpage)
+        thumbnail = self._proto_relative_url(self._search_regex(
+            r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'thumbnail url', default=None,
+            group='url')) or self._og_search_thumbnail(webpage)
+        uploader = self._search_regex(
+            r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
+            webpage, 'uploader', fatal=False)
+        upload_date = unified_strdate(self._search_regex(
+            r'<div[^>]+class="info-date"[^>]*>([^<]+)',
+            webpage, 'upload date', fatal=False))
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
             'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
         }
index 1d9166455aae935f1eb51777d170e0f6259ffd4e..0c015141fa322465b1476e035f87da223555b211 100644 (file)
@@ -14,13 +14,13 @@ class GoshgayIE(InfoExtractor):
     _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
     _TEST = {
         'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
-        'md5': '027fcc54459dff0feb0bc06a7aeda680',
+        'md5': '4b6db9a0a333142eb9f15913142b0ed1',
         'info_dict': {
             'id': '299069',
             'ext': 'flv',
             'title': 'DIESEL SFW XXX Video',
             'thumbnail': 're:^http://.*\.jpg$',
-            'duration': 79,
+            'duration': 80,
             'age_limit': 18,
         }
     }
@@ -47,5 +47,5 @@ class GoshgayIE(InfoExtractor):
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
-            'age_limit': self._family_friendly_search(webpage),
+            'age_limit': 18,
         }
index 145b55bf3e019277d1e8ef958aacb90015cad737..359b348e4fd7df58780bc5c5e623ed443da40553 100644 (file)
@@ -2,12 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    xpath_element,
-    xpath_text,
-    int_or_none,
-    parse_duration,
-)
 
 
 class GPUTechConfIE(InfoExtractor):
@@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
-        xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
-
-        doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
-
-        metadata = xpath_element(doc, 'metadata')
-        http_host = xpath_text(metadata, 'httpHost', 'http host', True)
-        mbr_videos = xpath_element(metadata, 'MBRVideos')
-
-        formats = []
-        for mbr_video in mbr_videos.findall('MBRVideo'):
-            stream_name = xpath_text(mbr_video, 'streamName')
-            if stream_name:
-                formats.append({
-                    'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
-                    'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
-                })
-        self._sort_formats(formats)
+        root_path = self._search_regex(
+            r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
+            default='http://evt.dispeak.com/nvidia/events/gtc15/')
+        xml_file_id = self._search_regex(
+            r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
-            'title': xpath_text(metadata, 'title'),
-            'duration': parse_duration(xpath_text(metadata, 'endTime')),
-            'creator': xpath_text(metadata, 'speaker'),
-            'formats': formats,
+            'url': '%sxml/%s.xml' % (root_path, xml_file_id),
+            'ie': 'DigitalSpeaking',
         }
index 63c05b6a6f96dfa4437f15cd77524ab3d89e1018..f6b69662baf547aa48a9bdf460671f072bd59884 100644 (file)
@@ -16,14 +16,14 @@ class GrouponIE(InfoExtractor):
         'playlist': [{
             'info_dict': {
                 'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Bikram Yoga Huntington Beach | Orange County',
                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
                 'duration': 44.961,
             },
         }],
         'params': {
-            'skip_download': 'HLS',
+            'skip_download': 'HDS',
         }
     }
 
@@ -32,7 +32,7 @@ class GrouponIE(InfoExtractor):
         webpage = self._download_webpage(url, playlist_id)
 
         payload = self._parse_json(self._search_regex(
-            r'var\s+payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+            r'(?:var\s+|window\.)payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
         videos = payload['carousel'].get('dealVideos', [])
         entries = []
         for v in videos:
index 76b74c51d673df472d25dc5fc909aa9d073f4d59..65ba2a48b069bd67d2b3382f2d87bc1160145612 100644 (file)
@@ -24,6 +24,7 @@ class HowStuffWorksIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'duration': 161,
             },
+            'skip': 'Video broken',
         },
         {
             'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
index a38eae421a9199b578b3a724d205b13e6367c67a..059073749e67605464b6159b9391f71eb5a6052d 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     parse_duration,
     unified_strdate,
 )
@@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor):
             'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
             'duration': 1549,
             'upload_date': '20140124',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'expected_warnings': ['HTTP Error 404: Not Found'],
     }
 
     def _real_extract(self, url):
@@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor):
         description = data.get('description')
 
         thumbnails = []
-        for url in data['images'].values():
+        for url in filter(None, data['images'].values()):
             m = re.match('.*-([0-9]+x[0-9]+)\.', url)
             if not m:
                 continue
@@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor):
                 'resolution': m.group(1),
             })
 
-        formats = [{
-            'format': key,
-            'format_id': key.replace('/', '.'),
-            'ext': 'mp4',
-            'url': url,
-            'vcodec': 'none' if key.startswith('audio/') else None,
-        } for key, url in data.get('sources', {}).get('live', {}).items()]
+        formats = []
+        sources = data.get('sources', {})
+        live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
+        for key, url in live_sources:
+            ext = determine_ext(url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
+            else:
+                formats.append({
+                    'format': key,
+                    'format_id': key.replace('/', '.'),
+                    'ext': 'mp4',
+                    'url': url,
+                    'vcodec': 'none' if key.startswith('audio/') else None,
+                })
 
         if not formats and data.get('fivemin_id'):
             return self.url_result('5min:%s' % data['fivemin_id'])
index 11bb58d8a66edfc6ae06adb3d7e0d692262d757d..3cbe77ad80f2fc9a03c738745524d5dac98c9d37 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
         'md5': '0d2da106a9d2631273e192b372806516',
@@ -38,10 +38,19 @@ class InstagramIE(InfoExtractor):
     }, {
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'only_matching': True,
+    }, {
+        'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_embed_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
         blockquote_el = get_element_by_attribute(
             'class', 'instagram-media', webpage)
         if blockquote_el is None:
@@ -53,7 +62,9 @@ class InstagramIE(InfoExtractor):
             return mobj.group('link')
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        url = mobj.group('url')
 
         webpage = self._download_webpage(url, video_id)
         uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
index e60145b3dc5dc80f921c86a3b03a59cf5844b60e..45add007fd99c8bd80f16c1becfb42cf403d45d5 100644 (file)
@@ -1,93 +1,91 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
     compat_urlparse,
-    compat_urllib_parse_urlencode,
 )
 from ..utils import (
-    xpath_with_ns,
+    determine_ext,
+    int_or_none,
+    xpath_text,
 )
 
 
 class InternetVideoArchiveIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
+    _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
 
     _TEST = {
-        'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
+        'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
         'info_dict': {
-            'id': '452693',
+            'id': '194487',
             'ext': 'mp4',
-            'title': 'SKYFALL',
-            'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
-            'duration': 152,
+            'title': 'KICK-ASS 2',
+            'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }
 
     @staticmethod
-    def _build_url(query):
-        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
+    def _build_json_url(query):
+        return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
 
     @staticmethod
-    def _clean_query(query):
-        NEEDED_ARGS = ['publishedid', 'customerid']
-        query_dic = compat_urlparse.parse_qs(query)
-        cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
-        # Other player ids return m3u8 urls
-        cleaned_dic['playerid'] = '247'
-        cleaned_dic['videokbrate'] = '100000'
-        return compat_urllib_parse_urlencode(cleaned_dic)
+    def _build_xml_url(query):
+        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
 
     def _real_extract(self, url):
         query = compat_urlparse.urlparse(url).query
-        query_dic = compat_urlparse.parse_qs(query)
+        query_dic = compat_parse_qs(query)
         video_id = query_dic['publishedid'][0]
-        url = self._build_url(query)
 
-        flashconfiguration = self._download_xml(url, video_id,
-                                                'Downloading flash configuration')
-        file_url = flashconfiguration.find('file').text
-        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
-        # Replace some of the parameters in the query to get the best quality
-        # and http links (no m3u8 manifests)
-        file_url = re.sub(r'(?<=\?)(.+)$',
-                          lambda m: self._clean_query(m.group()),
-                          file_url)
-        info = self._download_xml(file_url, video_id,
-                                  'Downloading video info')
-        item = info.find('channel/item')
+        if '/player/' in url:
+            configuration = self._download_json(url, video_id)
+
+            # There are multiple videos in the playlist whlie only the first one
+            # matches the video played in browsers
+            video_info = configuration['playlist'][0]
+
+            formats = []
+            for source in video_info['sources']:
+                file_url = source['file']
+                if determine_ext(file_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        file_url, video_id, ext='mp4', m3u8_id='hls'))
+                else:
+                    a_format = {
+                        'url': file_url,
+                    }
+
+                    if source.get('label') and source['label'][-4:] == ' kbs':
+                        tbr = int_or_none(source['label'][:-4])
+                        a_format.update({
+                            'tbr': tbr,
+                            'format_id': 'http-%d' % tbr,
+                        })
+                        formats.append(a_format)
 
-        def _bp(p):
-            return xpath_with_ns(
-                p,
-                {
-                    'media': 'http://search.yahoo.com/mrss/',
-                    'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
-                }
-            )
-        formats = []
-        for content in item.findall(_bp('media:group/media:content')):
-            attr = content.attrib
-            f_url = attr['url']
-            width = int(attr['width'])
-            bitrate = int(attr['bitrate'])
-            format_id = '%d-%dk' % (width, bitrate)
-            formats.append({
-                'format_id': format_id,
-                'url': f_url,
-                'width': width,
-                'tbr': bitrate,
-            })
+            self._sort_formats(formats)
 
-        self._sort_formats(formats)
+            title = video_info['title']
+            description = video_info.get('description')
+            thumbnail = video_info.get('image')
+        else:
+            configuration = self._download_xml(url, video_id)
+            formats = [{
+                'url': xpath_text(configuration, './file', 'file URL', fatal=True),
+            }]
+            thumbnail = xpath_text(configuration, './image', 'thumbnail')
+            title = 'InternetVideoArchive video %s' % video_id
+            description = None
 
         return {
             'id': video_id,
-            'title': item.find('title').text,
+            'title': title,
             'formats': formats,
-            'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
-            'description': item.find('description').text,
-            'duration': int(attr['duration']),
+            'thumbnail': thumbnail,
+            'description': description,
         }
index 88570f261c224983a7f38e6911a88005ef80923e..ea8fbb329f4bb41208b31f7744bafa665f3ba793 100644 (file)
@@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
     IE_NAME = 'iqiyi'
     IE_DESC = '爱奇艺'
 
-    _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
+    _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
 
     _NETRC_MACHINE = 'iqiyi'
 
@@ -273,6 +273,9 @@ class IqiyiIE(InfoExtractor):
             'title': '灌篮高手 国语版',
         },
         'playlist_count': 101,
+    }, {
+        'url': 'http://www.pps.tv/w_19rrbav0ph.html',
+        'only_matching': True,
     }]
 
     _FORMATS_MAP = [
index bc226fa67c064b991674a510b1eba54d40dc67e0..aa0728abc0155fa6abbe8e2a88de18dd89d85138 100644 (file)
@@ -29,7 +29,7 @@ class IzleseneIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
                 'description': 'md5:253753e2655dde93f59f74b572454f6d',
-                'thumbnail': 're:^http://.*\.jpg',
+                'thumbnail': 're:^https?://.*\.jpg',
                 'uploader_id': 'pelikzzle',
                 'timestamp': int,
                 'upload_date': '20140702',
@@ -44,8 +44,7 @@ class IzleseneIE(InfoExtractor):
                 'id': '17997',
                 'ext': 'mp4',
                 'title': 'Tarkan Dortmund 2006 Konseri',
-                'description': 'Tarkan Dortmund 2006 Konseri',
-                'thumbnail': 're:^http://.*\.jpg',
+                'thumbnail': 're:^https://.*\.jpg',
                 'uploader_id': 'parlayankiz',
                 'timestamp': int,
                 'upload_date': '20061112',
@@ -62,7 +61,7 @@ class IzleseneIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
         thumbnail = self._proto_relative_url(
             self._og_search_thumbnail(webpage), scheme='http:')
 
diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py
deleted file mode 100644 (file)
index 158c09a..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-
-
-class JadoreCettePubIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
-
-    _TEST = {
-        'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
-        'md5': '401286a06067c70b44076044b66515de',
-        'info_dict': {
-            'id': 'jLMja3tr7a4',
-            'ext': 'mp4',
-            'title': 'La pire utilisation de Star Wars',
-            'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon.  Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, display_id)
-
-        title = self._html_search_regex(
-            r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
-            webpage, 'title')
-        description = self._html_search_regex(
-            r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
-            fatal=False)
-        real_url = self._search_regex(
-            r'\[/postlink\](.*)endofvid', webpage, 'video URL')
-        video_id = YoutubeIE.extract_id(real_url)
-
-        return {
-            '_type': 'url_transparent',
-            'url': real_url,
-            'id': video_id,
-            'title': title,
-            'description': description,
-        }
index 6770685d7027c3738fba35f3e057f6be2a3a512c..8a5e562dbc24fac4d18498e631e8f5e10d8fe038 100644 (file)
@@ -4,16 +4,15 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)
 
 
 class JWPlatformBaseIE(InfoExtractor):
     def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
         video_data = jwplayer_data['playlist'][0]
-        subtitles = {}
-        for track in video_data['tracks']:
-            if track['kind'] == 'captions':
-                subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
 
         formats = []
         for source in video_data['sources']:
@@ -35,12 +34,22 @@ class JWPlatformBaseIE(InfoExtractor):
                 })
         self._sort_formats(formats)
 
+        subtitles = {}
+        tracks = video_data.get('tracks')
+        if tracks and isinstance(tracks, list):
+            for track in tracks:
+                if track.get('file') and track.get('kind') == 'captions':
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track['file'])
+                    })
+
         return {
             'id': video_id,
             'title': video_data['title'] if require_title else video_data.get('title'),
             'description': video_data.get('description'),
             'thumbnail': self._proto_relative_url(video_data.get('image')),
             'timestamp': int_or_none(video_data.get('pubdate')),
+            'duration': float_or_none(jwplayer_data.get('duration')),
             'subtitles': subtitles,
             'formats': formats,
         }
index b4c30b7f3145fef78ec107d402c97927f1a8ad2e..a6050c4de3e1695ac26bd1a21bab981a52755c21 100644 (file)
@@ -2,39 +2,63 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
-from ..utils import (
-    js_to_json,
-)
 
 
 class KaraoketvIE(InfoExtractor):
-    _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
     _TEST = {
-        'url': 'http://karaoketv.co.il/?container=songs&id=171568',
+        'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
         'info_dict': {
-            'id': '171568',
-            'ext': 'mp4',
-            'title': 'אל העולם שלך - רותם כהן - שרים קריוקי',
+            'id': '58356',
+            'ext': 'flv',
+            'title': 'קריוקי של איזון',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+        api_page_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
+            webpage, 'API play URL', group='url')
+
+        api_page = self._download_webpage(api_page_url, video_id)
+        video_cdn_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
+            api_page, 'video cdn URL', group='url')
+
+        video_cdn = self._download_webpage(video_cdn_url, video_id)
+        play_path = self._parse_json(
+            self._search_regex(
+                r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
+            video_id)['clip']['url']
 
-        page_video_url = self._og_search_video_url(webpage, video_id)
-        config_json = compat_urllib_parse_unquote_plus(self._search_regex(
-            r'config=(.*)', page_video_url, 'configuration'))
+        settings = self._parse_json(
+            self._search_regex(
+                r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
+            video_id, fatal=False) or {}
 
-        urls_info_json = self._download_json(
-            config_json, video_id, 'Downloading configuration',
-            transform_source=js_to_json)
+        servers = settings.get('servers')
+        if not servers or not isinstance(servers, list):
+            servers = ('wowzail.video-cdn.com:80/vodcdn', )
 
-        url = urls_info_json['playlist'][0]['url']
+        formats = [{
+            'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
+            'play_path': play_path,
+            'app': 'vodcdn',
+            'page_url': video_cdn_url,
+            'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
+            'rtmp_real_time': True,
+            'ext': 'flv',
+        } for server in servers]
 
         return {
             'id': video_id,
             'title': self._og_search_title(webpage),
-            'url': url,
+            'formats': formats,
         }
index 2cb04e533d2e5c7caf5d3be062b9c0a51635cb1c..c05263e6165159320376939c252af7dea7aeadb2 100644 (file)
@@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
 
         video_id = self._search_regex(
             r'/config/video/(.+?)\.xml', webpage, 'video id')
+        # Server returns malformed headers
+        # Force Accept-Encoding: * to prevent gzipped results
         playlist = self._download_xml(
             'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
-            video_id, transform_source=fix_xml_ampersands)
+            video_id, transform_source=fix_xml_ampersands,
+            headers={'Accept-Encoding': '*'})
 
         NS_MAP = {
             'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
index 86c17c931f1d41f8276f5883ed2139f6348759c2..c0ece51133a441629a0272757f1a5e8157f671f6 100644 (file)
@@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
             'title': '八十年代精选',
             'description': '这些都是属于八十年代的回忆!',
         },
-        'playlist_count': 30,
+        'playlist_count': 24,
     }
 
     def _real_extract(self, url):
index d4fbafece22cc18a3938ff32cb29dfeb162ac122..2fab38079aac0c5f20a1772d52fa52642cb520bf 100644 (file)
@@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'This live stream has already finished.',
     }]
 
     def _real_extract(self, url):
@@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
+        if 'Dieser Livestream ist bereits beendet.' in webpage:
+            raise ExtractorError('This live stream has already finished.', expected=True)
+
         iframe_url = self._search_regex(
             r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
             webpage, 'iframe url')
index 40a3d23468636877cc485ac9e064ee3527a3dcb2..81b5d41be4a676e55c795fe233591913e7a691c8 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
+    determine_protocol,
     parse_duration,
     int_or_none,
 )
@@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor):
         'md5': 'ac02b570883020d208d405d5a3fd2f7f',
         'info_dict': {
             'id': '17473',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': '2 - Endliche Automaten und reguläre Sprachen',
             'creator': 'Frank Heitmann',
             'duration': 5220,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         }
     }
 
@@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor):
         title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
 
         formats = []
-        for url in set(re.findall(r'"src","([^"]+)"', webpage)):
+        for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
             ext = determine_ext(url)
+            protocol = determine_protocol({'url': url})
             if ext == 'f4m':
-                formats.extend(self._extract_f4m_formats(url, video_id))
+                formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(url, video_id))
+                formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
             else:
+                if protocol == 'rtmp':
+                    continue  # XXX: currently broken
                 formats.append({
+                    'format_id': protocol,
                     'url': url,
                 })
 
index 4684994e1726fc1785de3b8df6061f2eaf278ed8..29fba5f30b0cc4633dbc978e886c62eab0d4ac81 100644 (file)
@@ -53,6 +53,14 @@ class LiveLeakIE(InfoExtractor):
         }
     }]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
+            webpage)
+        if mobj:
+            return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
index 2338e7f96f36bea7246e7357302cbcbcac39ad8a..2100583df46ab7955846f8e3b08467d13ed3440e 100644 (file)
@@ -49,8 +49,8 @@ class MDRIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Beutolomäus und der geheime Weihnachtswunsch',
             'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
-            'timestamp': 1419047100,
-            'upload_date': '20141220',
+            'timestamp': 1450950000,
+            'upload_date': '20151224',
             'duration': 4628,
             'uploader': 'KIKA',
         },
@@ -71,8 +71,8 @@ class MDRIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         data_url = self._search_regex(
-            r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
-            webpage, 'data url', default=None, group='url').replace('\/', '/')
+            r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
+            webpage, 'data url', group='url').replace('\/', '/')
 
         doc = self._download_xml(
             compat_urlparse.urljoin(url, data_url), video_id)
index e30320569805aedaa6694ae54f9086909593f7a4..444ec0310877e8377f78e88b07fd110ca9e6aa0d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 class MetacriticIE(InfoExtractor):
     _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
         'info_dict': {
             'id': '3698222',
@@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor):
             'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
             'duration': 221,
         },
-    }
+        'skip': 'Not providing trailers anymore',
+    }, {
+        'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
+        'info_dict': {
+            'id': '5740315',
+            'ext': 'mp4',
+            'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
+            'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
+            'duration': 114,
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
new file mode 100644 (file)
index 0000000..8f16a8f
--- /dev/null
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MGTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+).html'
+
+    _TEST = {
+        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
+        'md5': '',
+        'info_dict': {
+            'id': '3116640',
+            'ext': 'mp4',
+            'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
+            'description': '我是歌手第四季双年巅峰会',
+            'duration': 7461,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }
+
+    _FORMAT_MAP = {
+        '标清': ('Standard', 0),
+        '高清': ('High', 1),
+        '超清': ('SuperHigh', 2),
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        api_data = self._download_json(
+            'http://v.api.mgtv.com/player/video', video_id,
+            query={'video_id': video_id})['data']
+        info = api_data['info']
+
+        formats = []
+        for idx, stream in enumerate(api_data['stream']):
+            format_name = stream.get('name')
+            format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
+            format_info = self._download_json(
+                stream['url'], video_id,
+                note='Download video info for format %s' % format_id or '#%d' % idx)
+            formats.append({
+                'format_id': format_id,
+                'url': format_info['info'],
+                'ext': 'mp4',  # These are m3u8 playlists
+                'preference': preference,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['title'].strip(),
+            'formats': formats,
+            'description': info.get('desc'),
+            'duration': int_or_none(info.get('duration')),
+            'thumbnail': info.get('thumb'),
+        }
index 949ad11db2ecd0c53e5cb4c361bc43aa779cb1e6..e48eba3fa7343bbdf964be583a680affa5ad29fa 100644 (file)
@@ -1,8 +1,5 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor):
             'id': '3453494717001',
             'ext': 'mp4',
             'title': 'The Gospel by Numbers',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20140410',
             'description': 'Coming soon from T4G 2014!',
-            'uploader': 'LifeWay Christian Resources (MG)',
+            'uploader_id': '2034960640001',
+            'timestamp': 1397145591,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
+        'add_ie': ['TDSLifeway'],
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        portlets_json = self._search_regex(
-            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
-        portlets = json.loads(portlets_json)
+        portlets = self._parse_json(self._search_regex(
+            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
+            video_id)
         pl_id = self._search_regex(
-            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
+            r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
 
         for i, portlet in enumerate(portlets):
             portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
@@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor):
                 r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
                 default=None)
             if video_iframe_url:
-                surl = smuggle_url(
-                    video_iframe_url, {'force_videoid': video_id})
-                return {
-                    '_type': 'url',
-                    'id': video_id,
-                    'url': surl,
-                }
+                return self.url_result(
+                    smuggle_url(video_iframe_url, {'force_videoid': video_id}),
+                    video_id=video_id)
 
         raise ExtractorError('Could not find video iframe in any portlets')
index 101497118275b7f1b5bf0564048f1dc9fc4b878b..483f6925fda989fc5111694c8c82f1807a1f3d97 100644 (file)
@@ -1,26 +1,35 @@
 from __future__ import unicode_literals
 
+import base64
+import functools
+import itertools
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_chr,
+    compat_ord,
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
 from ..utils import (
+    clean_html,
     ExtractorError,
-    HEADRequest,
+    OnDemandPagedList,
     parse_count,
     str_to_int,
 )
 
 
 class MixcloudIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
     IE_NAME = 'mixcloud'
 
     _TESTS = [{
         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
         'info_dict': {
             'id': 'dholbach-cryptkeeper',
-            'ext': 'mp3',
+            'ext': 'm4a',
             'title': 'Cryptkeeper',
             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
             'uploader': 'Daniel Holbach',
@@ -38,22 +47,22 @@ class MixcloudIE(InfoExtractor):
             'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
             'uploader': 'Gilles Peterson Worldwide',
             'uploader_id': 'gillespeterson',
-            'thumbnail': 're:https?://.*/images/',
+            'thumbnail': 're:https?://.*',
             'view_count': int,
             'like_count': int,
         },
     }]
 
-    def _check_url(self, url, track_id, ext):
-        try:
-            # We only want to know if the request succeed
-            # don't download the whole file
-            self._request_webpage(
-                HEADRequest(url), track_id,
-                'Trying %s URL' % ext)
-            return True
-        except ExtractorError:
-            return False
+    # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
+    @staticmethod
+    def _decrypt_play_info(play_info):
+        KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
+
+        play_info = base64.b64decode(play_info.encode('ascii'))
+
+        return ''.join([
+            compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
+            for idx, ch in enumerate(play_info)])
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -63,14 +72,19 @@ class MixcloudIE(InfoExtractor):
 
         webpage = self._download_webpage(url, track_id)
 
-        preview_url = self._search_regex(
-            r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
-        song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
-        song_url = song_url.replace('/previews/', '/c/originals/')
-        if not self._check_url(song_url, track_id, 'mp3'):
-            song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
-            if not self._check_url(song_url, track_id, 'm4a'):
-                raise ExtractorError('Unable to extract track url')
+        message = self._html_search_regex(
+            r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
+            webpage, 'error message', default=None)
+
+        encrypted_play_info = self._search_regex(
+            r'm-play-info="([^"]+)"', webpage, 'play info')
+        play_info = self._parse_json(
+            self._decrypt_play_info(encrypted_play_info), track_id)
+
+        if message and 'stream_url' not in play_info:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+        song_url = play_info['stream_url']
 
         PREFIX = (
             r'm-play-on-spacebar[^>]+'
@@ -105,3 +119,201 @@ class MixcloudIE(InfoExtractor):
             'view_count': view_count,
             'like_count': like_count,
         }
+
+
+class MixcloudPlaylistBaseIE(InfoExtractor):
+    _PAGE_SIZE = 24
+
+    def _find_urls_in_page(self, page):
+        for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
+            yield self.url_result(
+                compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
+                MixcloudIE.ie_key())
+
+    def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
+        real_page_number = real_page_number or current_page + 1
+        return self._download_webpage(
+            'https://www.mixcloud.com/%s/' % path, video_id,
+            note='Download %s (page %d)' % (page_name, current_page + 1),
+            errnote='Unable to download %s' % page_name,
+            query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
+            headers={'X-Requested-With': 'XMLHttpRequest'})
+
+    def _tracks_page_func(self, page, video_id, page_name, current_page):
+        resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
+
+        for item in self._find_urls_in_page(resp):
+            yield item
+
+    def _get_user_description(self, page_content):
+        return self._html_search_regex(
+            r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
+            page_content, 'user description', fatal=False)
+
+
+class MixcloudUserIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
+    IE_NAME = 'mixcloud:user'
+
+    _TESTS = [{
+        'url': 'http://www.mixcloud.com/dholbach/',
+        'info_dict': {
+            'id': 'dholbach_uploads',
+            'title': 'Daniel Holbach (uploads)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/uploads/',
+        'info_dict': {
+            'id': 'dholbach_uploads',
+            'title': 'Daniel Holbach (uploads)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/favorites/',
+        'info_dict': {
+            'id': 'dholbach_favorites',
+            'title': 'Daniel Holbach (favorites)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'params': {
+            'playlist_items': '1-100',
+        },
+        'playlist_mincount': 100,
+    }, {
+        'url': 'http://www.mixcloud.com/dholbach/listens/',
+        'info_dict': {
+            'id': 'dholbach_listens',
+            'title': 'Daniel Holbach (listens)',
+            'description': 'md5:327af72d1efeb404a8216c27240d1370',
+        },
+        'params': {
+            'playlist_items': '1-100',
+        },
+        'playlist_mincount': 100,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+        list_type = mobj.group('type')
+
+        # if only a profile URL was supplied, default to download all uploads
+        if list_type is None:
+            list_type = 'uploads'
+
+        video_id = '%s_%s' % (user_id, list_type)
+
+        profile = self._download_webpage(
+            'https://www.mixcloud.com/%s/' % user_id, video_id,
+            note='Downloading user profile',
+            errnote='Unable to download user profile')
+
+        username = self._og_search_title(profile)
+        description = self._get_user_description(profile)
+
+        entries = OnDemandPagedList(
+            functools.partial(
+                self._tracks_page_func,
+                '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
+            self._PAGE_SIZE, use_cache=True)
+
+        return self.playlist_result(
+            entries, video_id, '%s (%s)' % (username, list_type), description)
+
+
+class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
+    IE_NAME = 'mixcloud:playlist'
+
+    _TESTS = [{
+        'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
+        'info_dict': {
+            'id': 'RedBullThre3style_tokyo-finalists-2015',
+            'title': 'National Champions 2015',
+            'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
+        },
+        'playlist_mincount': 16,
+    }, {
+        'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
+        'info_dict': {
+            'id': 'maxvibes_jazzcat-on-ness-radio',
+            'title': 'Jazzcat on Ness Radio',
+            'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
+        },
+        'playlist_mincount': 23
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('user')
+        playlist_id = mobj.group('playlist')
+        video_id = '%s_%s' % (user_id, playlist_id)
+
+        profile = self._download_webpage(
+            url, user_id,
+            note='Downloading playlist page',
+            errnote='Unable to download playlist page')
+
+        description = self._get_user_description(profile)
+        playlist_title = self._html_search_regex(
+            r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
+            profile, 'playlist title')
+
+        entries = OnDemandPagedList(
+            functools.partial(
+                self._tracks_page_func,
+                '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
+            self._PAGE_SIZE)
+
+        return self.playlist_result(entries, video_id, playlist_title, description)
+
+
+class MixcloudStreamIE(MixcloudPlaylistBaseIE):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
+    IE_NAME = 'mixcloud:stream'
+
+    _TEST = {
+        'url': 'https://www.mixcloud.com/FirstEar/stream/',
+        'info_dict': {
+            'id': 'FirstEar',
+            'title': 'First Ear',
+            'description': 'Curators of good music\nfirstearmusic.com',
+        },
+        'playlist_mincount': 192,
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, user_id)
+
+        entries = []
+        prev_page_url = None
+
+        def _handle_page(page):
+            entries.extend(self._find_urls_in_page(page))
+            return self._search_regex(
+                r'm-next-page-url="([^"]+)"', page,
+                'next page URL', default=None)
+
+        next_page_url = _handle_page(webpage)
+
+        for idx in itertools.count(0):
+            if not next_page_url or prev_page_url == next_page_url:
+                break
+
+            prev_page_url = next_page_url
+            current_page = int(self._search_regex(
+                r'\?page=(\d+)', next_page_url, 'next page number'))
+
+            next_page_url = _handle_page(self._fetch_tracks_page(
+                '%s/stream' % user_id, user_id, 'stream', idx,
+                real_page_number=current_page))
+
+        username = self._og_search_title(webpage)
+        description = self._get_user_description(webpage)
+
+        return self.playlist_result(entries, user_id, username, description)
diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py
deleted file mode 100644 (file)
index a85109a..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-    urlencode_postdata,
-)
-
-
-class MooshareIE(InfoExtractor):
-    IE_NAME = 'mooshare'
-    IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
-
-    _TESTS = [
-        {
-            'url': 'http://mooshare.biz/8dqtk4bjbp8g',
-            'md5': '4e14f9562928aecd2e42c6f341c8feba',
-            'info_dict': {
-                'id': '8dqtk4bjbp8g',
-                'ext': 'mp4',
-                'title': 'Comedy Football 2011 - (part 1-2)',
-                'duration': 893,
-            },
-        },
-        {
-            'url': 'http://mooshare.biz/aipjtoc4g95j',
-            'info_dict': {
-                'id': 'aipjtoc4g95j',
-                'ext': 'mp4',
-                'title': 'Orange Caramel  Dashing Through the Snow',
-                'duration': 212,
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            }
-        }
-    ]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id, 'Downloading page')
-
-        if re.search(r'>Video Not Found or Deleted<', page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
-        title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
-
-        download_form = {
-            'op': 'download1',
-            'id': video_id,
-            'hash': hash_key,
-        }
-
-        request = sanitized_Request(
-            'http://mooshare.biz/%s' % video_id, urlencode_postdata(download_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-
-        self._sleep(5, video_id)
-
-        video_page = self._download_webpage(request, video_id, 'Downloading video page')
-
-        thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
-        duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
-        duration = int(duration_str) if duration_str is not None else None
-
-        formats = []
-
-        # SD video
-        mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('url'),
-                'format_id': 'sd',
-                'format': 'SD',
-            })
-
-        # HD video
-        mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('url'),
-                'format_id': 'hd',
-                'format': 'HD',
-            })
-
-        # rtmp video
-        mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
-        if mobj is not None:
-            formats.append({
-                'url': mobj.group('rtmpurl'),
-                'play_path': mobj.group('playpath'),
-                'rtmp_live': False,
-                'ext': 'mp4',
-                'format_id': 'rtmp',
-                'format': 'HD',
-            })
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-        }
index 50d92b50ae5ec2fa49e45cc64aea7f08cc21ccea..2174e5665778b590055c06255a91c030cb579d29 100644 (file)
@@ -1,17 +1,21 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+)
 
 
 class MusicPlayOnIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://en.musicplayon.com/play?v=433377',
+        'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
         'info_dict': {
             'id': '433377',
             'ext': 'mp4',
@@ -20,15 +24,16 @@ class MusicPlayOnIE(InfoExtractor):
             'duration': 342,
             'uploader': 'ultrafish',
         },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
+    }, {
+        'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
+        'only_matching': True,
+    }]
+
+    _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
+        url = self._URL_TEMPLATE % video_id
 
         page = self._download_webpage(url, video_id)
 
@@ -40,28 +45,14 @@ class MusicPlayOnIE(InfoExtractor):
         uploader = self._html_search_regex(
             r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
 
-        formats = [
-            {
-                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
-                'ext': 'mp4',
-            }
-        ]
-
-        manifest = self._download_webpage(
-            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
-
-        for entry in manifest.split('#')[1:]:
-            if entry.startswith('EXT-X-STREAM-INF:'):
-                meta, url, _ = entry.split('\n')
-                params = dict(param.split('=') for param in meta.split(',')[1:])
-                formats.append({
-                    'url': url,
-                    'ext': 'mp4',
-                    'tbr': int(params['BANDWIDTH']),
-                    'width': int(params['RESOLUTION'].split('x')[1]),
-                    'height': int(params['RESOLUTION'].split('x')[-1]),
-                    'format_note': params['NAME'].replace('"', '').strip(),
-                })
+        sources = self._parse_json(
+            self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
+            video_id, transform_source=js_to_json)
+        formats = [{
+            'url': compat_urlparse.urljoin(url, source['src']),
+            'ext': mimetype2ext(source.get('type')),
+            'format_note': source.get('data-res'),
+        } for source in sources]
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/nerdist.py b/youtube_dl/extractor/nerdist.py
deleted file mode 100644 (file)
index c6dc34b..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-from ..utils import (
-    determine_ext,
-    parse_iso8601,
-    xpath_text,
-)
-
-
-class NerdistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
-        'md5': '3698ed582931b90d9e81e02e26e89f23',
-        'info_dict': {
-            'display_id': 'exclusive-which-dc-characters-w',
-            'id': 'RPHpvJyr',
-            'ext': 'mp4',
-            'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
-            'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
-            'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
-            'uploader': 'Eric Diaz',
-            'upload_date': '20150202',
-            'timestamp': 1422892808,
-        }
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._search_regex(
-            r'''(?x)<script\s+(?:type="text/javascript"\s+)?
-                src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
-            webpage, 'video ID')
-        timestamp = parse_iso8601(self._html_search_meta(
-            'shareaholic:article_published_time', webpage, 'upload date'))
-        uploader = self._html_search_meta(
-            'shareaholic:article_author_name', webpage, 'article author')
-
-        doc = self._download_xml(
-            'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
-        video_info = doc.find('.//item')
-        title = xpath_text(video_info, './title', fatal=True)
-        description = xpath_text(video_info, './description')
-        thumbnail = xpath_text(
-            video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
-
-        formats = []
-        for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
-            vurl = source.attrib['file']
-            ext = determine_ext(vurl)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
-                    preference=0))
-            elif ext == 'smil':
-                formats.extend(self._extract_smil_formats(
-                    vurl, video_id, fatal=False
-                ))
-            else:
-                formats.append({
-                    'format_id': ext,
-                    'url': vurl,
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'formats': formats,
-            'uploader': uploader,
-        }
index 0d36474fa069b793ff32e90e6de1804de09058ac..978a05841ce68161330f9db24169dd330e51efc1 100644 (file)
@@ -89,6 +89,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1431878400,
             'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics translation.',
         'url': 'http://music.163.com/#/song?id=29822014',
@@ -101,6 +102,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1419523200,
             'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics.',
         'url': 'http://music.163.com/song?id=17241424',
@@ -112,6 +114,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'upload_date': '20080211',
             'timestamp': 1202745600,
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Has translated name.',
         'url': 'http://music.163.com/#/song?id=22735043',
@@ -124,7 +127,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'upload_date': '20100127',
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
-        }
+        },
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _process_lyrics(self, lyrics_info):
@@ -192,6 +196,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
             'title': 'B\'day',
         },
         'playlist_count': 23,
+        'skip': 'Blocked outside Mainland China',
     }
 
     def _real_extract(self, url):
@@ -223,6 +228,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
             'title': '张惠妹 - aMEI;阿密特',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Singer has translated name.',
         'url': 'http://music.163.com/#/artist?id=124098',
@@ -231,6 +237,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
             'title': '李昇基 - 이승기',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -266,6 +273,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
             'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
         },
         'playlist_count': 99,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Toplist/Charts sample',
         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
@@ -275,6 +283,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
         },
         'playlist_count': 50,
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -314,6 +323,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
             'creator': '白雅言',
             'upload_date': '20150520',
         },
+        'skip': 'Blocked outside Mainland China',
     }
 
     def _real_extract(self, url):
@@ -357,6 +367,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'upload_date': '20150613',
             'duration': 900,
         },
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
@@ -366,6 +377,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
         },
         'playlist_count': 4,
+        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'This program has accompanying songs.',
         'url': 'http://music.163.com/#/program?id=10141022',
@@ -379,7 +391,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
         },
         'params': {
             'noplaylist': True
-        }
+        },
+        'skip': 'Blocked outside Mainland China',
     }]
 
     def _real_extract(self, url):
@@ -438,6 +451,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
             'description': 'md5:766220985cbd16fdd552f64c578a6b15'
         },
         'playlist_mincount': 40,
+        'skip': 'Blocked outside Mainland China',
     }
     _PAGE_SIZE = 1000
 
index cd117b04edeff88d90842f2ed8e15a8c43bde714..7059403239ce19ac8b2861fa4af0dde93c98467b 100644 (file)
@@ -7,8 +7,8 @@ from .common import InfoExtractor
 
 
 class NewgroundsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.newgrounds.com/audio/listen/549479',
         'md5': 'fe6033d297591288fa1c1f780386f07a',
         'info_dict': {
@@ -17,7 +17,16 @@ class NewgroundsIE(InfoExtractor):
             'title': 'B7 - BusMode',
             'uploader': 'Burn7',
         }
-    }
+    }, {
+        'url': 'http://www.newgrounds.com/portal/view/673111',
+        'md5': '3394735822aab2478c31b1004fe5e5bc',
+        'info_dict': {
+            'id': '673111',
+            'ext': 'mp4',
+            'title': 'Dancin',
+            'uploader': 'Squirrelman82',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -25,9 +34,11 @@ class NewgroundsIE(InfoExtractor):
         webpage = self._download_webpage(url, music_id)
 
         title = self._html_search_regex(
-            r',"name":"([^"]+)",', webpage, 'music title')
+            r'<title>([^>]+)</title>', webpage, 'title')
+
         uploader = self._html_search_regex(
-            r',"artist":"([^"]+)",', webpage, 'music uploader')
+            [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
+            webpage, 'uploader')
 
         music_url_json_string = self._html_search_regex(
             r'({"url":"[^"]+"),', webpage, 'music url') + '}'
index 5771a675dc245ee2687d0d4644c833dc7de5d0e4..3bbd4735502e113fcc46a07981ff5863c52fef15 100644 (file)
@@ -16,7 +16,14 @@ class NovaMovIE(InfoExtractor):
     IE_NAME = 'novamov'
     IE_DESC = 'NovaMov'
 
-    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+    _VALID_URL_TEMPLATE = r'''(?x)
+                            http://
+                                (?:
+                                    (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
+                                    (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
+                                )
+                                (?P<id>[a-z\d]{13})
+                            '''
     _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
 
     _HOST = 'www.novamov.com'
@@ -189,7 +196,7 @@ class AuroraVidIE(NovaMovIE):
 
     _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
         'md5': '7205f346a52bbeba427603ba10d4b935',
         'info_dict': {
@@ -199,4 +206,7 @@ class AuroraVidIE(NovaMovIE):
             'description': 'search engine optimization is used to rank the web page in the google search engine'
         },
         'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
-    }
+    }, {
+        'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
+        'only_matching': True,
+    }]
index 0f1f448fe3126670932b371498b73b0bf0a7924e..6e843c327603f19560e2bb35a2d5d462fe4516c3 100644 (file)
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import determine_ext
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
 
 
 class OnionStudiosIE(InfoExtractor):
@@ -17,7 +20,7 @@ class OnionStudiosIE(InfoExtractor):
             'id': '2937',
             'ext': 'mp4',
             'title': 'Hannibal charges forward, stops for a cocktail',
-            'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+            'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'The A.V. Club',
             'uploader_id': 'TheAVClub',
@@ -42,9 +45,19 @@ class OnionStudiosIE(InfoExtractor):
 
         formats = []
         for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
-            if determine_ext(src) != 'm3u8':  # m3u8 always results in 403
+            ext = determine_ext(src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                height = int_or_none(self._search_regex(
+                    r'/(\d+)\.%s' % ext, src, 'height', default=None))
                 formats.append({
+                    'format_id': ext + ('-%sp' % height if height else ''),
                     'url': src,
+                    'height': height,
+                    'ext': ext,
+                    'preference': 1,
                 })
         self._sort_formats(formats)
 
diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py
new file mode 100644 (file)
index 0000000..9ecdbc1
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class PeopleIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html'
+
+    _TEST = {
+        'url': 'http://www.people.com/people/videos/0,,20995451,00.html',
+        'info_dict': {
+            'id': 'ref:20995451',
+            'ext': 'mp4',
+            'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”',
+            'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 246.318,
+            'timestamp': 1458720585,
+            'upload_date': '20160323',
+            'uploader_id': '416418724',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['BrightcoveNew'],
+    }
+
+    def _real_extract(self, url):
+        return self.url_result(
+            'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s'
+            % self._match_id(url), 'BrightcoveNew')
diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py
new file mode 100644 (file)
index 0000000..2da93ed
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class PressTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?'
+
+    _TEST = {
+        'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/',
+        'md5': '5d7e3195a447cb13e9267e931d8dd5a5',
+        'info_dict': {
+            'id': '459911',
+            'display_id': 'Australian-sewerage-treatment-facility-',
+            'ext': 'mp4',
+            'title': 'Organic mattresses used to clean waste water',
+            'upload_date': '20160409',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'description': 'md5:20002e654bbafb6908395a5c0cfcd125'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        # extract video URL from webpage
+        video_url = self._hidden_inputs(webpage)['inpPlayback']
+
+        # build list of available formats
+        # specified in http://www.presstv.ir/Scripts/playback.js
+        base_url = 'http://192.99.219.222:82/presstv'
+        _formats = [
+            (180, '_low200.mp4'),
+            (360, '_low400.mp4'),
+            (720, '_low800.mp4'),
+            (1080, '.mp4')
+        ]
+
+        formats = [{
+            'url': base_url + video_url[:-4] + extension,
+            'format_id': '%dp' % height,
+            'height': height,
+        } for height, extension in _formats]
+
+        # extract video metadata
+        title = remove_start(
+            self._html_search_meta('title', webpage, fatal=True), 'PressTV-')
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        upload_date = '%04d%02d%02d' % (
+            int(mobj.group('y')),
+            int(mobj.group('m')),
+            int(mobj.group('d')),
+        )
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'description': description
+        }
index cce84b9e4d95e53731f01d334830faac9f1e008d..fca30e1aae5b35f9ef439fccc8396b5127f79aa9 100644 (file)
@@ -40,7 +40,7 @@ class Puls4IE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         error_message = self._html_search_regex(
-            r'<div class="message-error">(.+?)</div>',
+            r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
             webpage, 'error message', default=None)
         if error_message:
             raise ExtractorError(
diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py
deleted file mode 100644 (file)
index f414e23..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-)
-from ..utils import (
-    determine_ext,
-    int_or_none,
-)
-
-
-class QuickVidIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?quickvid\.org/watch\.php\?v=(?P<id>[a-zA-Z_0-9-]+)'
-    _TEST = {
-        'url': 'http://quickvid.org/watch.php?v=sUQT3RCG8dx',
-        'md5': 'c0c72dd473f260c06c808a05d19acdc5',
-        'info_dict': {
-            'id': 'sUQT3RCG8dx',
-            'ext': 'mp4',
-            'title': 'Nick Offerman\'s Summer Reading Recap',
-            'thumbnail': 're:^https?://.*\.(?:png|jpg|gif)$',
-            'view_count': int,
-        },
-        'skip': 'Not accessible from Travis CI server',
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(r'<h2>(.*?)</h2>', webpage, 'title')
-        view_count = int_or_none(self._html_search_regex(
-            r'(?s)<div id="views">(.*?)</div>',
-            webpage, 'view count', fatal=False))
-        video_code = self._search_regex(
-            r'(?s)<video id="video"[^>]*>(.*?)</video>', webpage, 'video code')
-        formats = [
-            {
-                'url': compat_urlparse.urljoin(url, src),
-                'format_id': determine_ext(src, None),
-            } for src in re.findall('<source\s+src="([^"]+)"', video_code)
-        ]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'view_count': view_count,
-        }
index e8bb20a0803700937875355d2f854d1de88cea1a..f9cd48790c3b4a92b82bf1880020d53a074b1434 100644 (file)
@@ -1,11 +1,11 @@
 from __future__ import unicode_literals
 
-from .videodetective import VideoDetectiveIE
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from .internetvideoarchive import InternetVideoArchiveIE
 
 
-# It just uses the same method as videodetective.com,
-# the internetvideoarchive.com is extracted from the og:video property
-class RottenTomatoesIE(VideoDetectiveIE):
+class RottenTomatoesIE(InfoExtractor):
     _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
 
     _TEST = {
@@ -13,7 +13,19 @@ class RottenTomatoesIE(VideoDetectiveIE):
         'info_dict': {
             'id': '613340',
             'ext': 'mp4',
-            'title': 'TOY STORY 3',
-            'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+            'title': 'Toy Story 3',
         },
     }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        og_video = self._og_search_video_url(webpage)
+        query = compat_urlparse.urlparse(og_video).query
+
+        return {
+            '_type': 'url_transparent',
+            'url': InternetVideoArchiveIE._build_xml_url(query),
+            'ie_key': InternetVideoArchiveIE.ie_key(),
+            'title': self._og_search_title(webpage),
+        }
index 05337421ca4210af5a9a797f22c112bb663a0960..7a88a42cd84dbfd9f343567dffb5f462c10329b7 100644 (file)
@@ -1,15 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    ExtractorError,
-    js_to_json,
-)
+from .jwplatform import JWPlatformBaseIE
+from ..utils import js_to_json
 
 
-class ScreencastOMaticIE(InfoExtractor):
+class ScreencastOMaticIE(JWPlatformBaseIE):
     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
     _TEST = {
         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
@@ -20,6 +16,7 @@ class ScreencastOMaticIE(InfoExtractor):
             'title': 'Welcome to 3-4 Philosophy @ DECV!',
             'thumbnail': 're:^https?://.*\.jpg$',
             'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+            'duration': 369.163,
         }
     }
 
@@ -27,23 +24,14 @@ class ScreencastOMaticIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        setup_js = self._search_regex(
-            r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
-            webpage, 'setup code')
-        data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
-        try:
-            video_data = next(
-                m for m in data['modes'] if m.get('type') == 'html5')
-        except StopIteration:
-            raise ExtractorError('Could not find any video entries!')
-        video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
-        thumbnail = data.get('image')
+        jwplayer_data = self._parse_json(
+            self._search_regex(
+                r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
+            video_id, transform_source=js_to_json)
 
-        return {
-            'id': video_id,
+        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+        info_dict.update({
             'title': self._og_search_title(webpage),
             'description': self._og_search_description(webpage),
-            'url': video_url,
-            'ext': 'mp4',
-            'thumbnail': thumbnail,
-        }
+        })
+        return info_dict
index 4f0c66213cc1269f44f4c0d9672a83ba3999d441..e5c28ae890ee61536052a5716677d486d0a5b43e 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import (
+    js_to_json,
     unified_strdate,
 )
 
@@ -94,19 +95,32 @@ class SportBoxEmbedIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        hls = self._search_regex(
-            r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]",
-            webpage, 'hls file')
+        formats = []
 
-        formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
-        self._sort_formats(formats)
+        def cleanup_js(code):
+            # desktop_advert_config contains complex Javascripts and we don't need it
+            return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
+
+        jwplayer_data = self._parse_json(self._search_regex(
+            r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
+            transform_source=cleanup_js)
+
+        hls_url = jwplayer_data.get('hls_url')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(
+                hls_url, video_id, ext='mp4', m3u8_id='hls'))
 
-        title = self._search_regex(
-            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
+        rtsp_url = jwplayer_data.get('rtsp_url')
+        if rtsp_url:
+            formats.append({
+                'url': rtsp_url,
+                'format_id': 'rtsp',
+            })
+
+        self._sort_formats(formats)
 
-        thumbnail = self._search_regex(
-            r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"',
-            webpage, 'thumbnail', default=None)
+        title = jwplayer_data['node_title']
+        thumbnail = jwplayer_data.get('image_url')
 
         return {
             'id': video_id,
index 6a57fa60a5a2ea877f65a1af045f14d05377c1a9..e529051d100b8024007229200648ea259b3d1677 100644 (file)
@@ -14,7 +14,6 @@ class StreetVoiceIE(InfoExtractor):
         'info_dict': {
             'id': '94440',
             'ext': 'mp3',
-            'filesize': 4167053,
             'title': '輸',
             'description': 'Crispy脆樂團 - 輸',
             'thumbnail': 're:^https?://.*\.jpg$',
@@ -32,20 +31,19 @@ class StreetVoiceIE(InfoExtractor):
         song_id = self._match_id(url)
 
         song = self._download_json(
-            'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
+            'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
 
         title = song['name']
-        author = song['musician']['name']
+        author = song['user']['nickname']
 
         return {
             'id': song_id,
             'url': song['file'],
-            'filesize': song.get('size'),
             'title': title,
             'description': '%s - %s' % (author, title),
             'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
             'duration': song.get('length'),
             'upload_date': unified_strdate(song.get('created_at')),
             'uploader': author,
-            'uploader_id': compat_str(song['musician']['id']),
+            'uploader_id': compat_str(song['user']['id']),
         }
diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dl/extractor/tdslifeway.py
new file mode 100644 (file)
index 0000000..4d1f5c8
--- /dev/null
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TDSLifewayIE(InfoExtractor):
+    _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html'
+
+    _TEST = {
+        # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
+        'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb&registration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
+        'info_dict': {
+            'id': '3453494717001',
+            'ext': 'mp4',
+            'title': 'The Gospel by Numbers',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'upload_date': '20140410',
+            'description': 'Coming soon from T4G 2014!',
+            'uploader_id': '2034960640001',
+            'timestamp': 1397145591,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['BrightcoveNew'],
+    }
+
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        brightcove_id = self._match_id(url)
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
index a3d05f97d681b6cb4da6adf179a4f0a5744e5123..eefecc490c5d13476259497e79f7a3ebe68caee7 100644 (file)
@@ -1,11 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 
 
 class TeleBruxellesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
     _TESTS = [{
         'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
         'md5': '59439e568c9ee42fb77588b2096b214f',
@@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
         title = self._html_search_regex(
             r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
 
         rtmp_url = self._html_search_regex(
-            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
             webpage, 'RTMP url')
-        rtmp_url = rtmp_url.replace("\" + \"", "")
+        rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
 
         return {
-            'id': article_id,
+            'id': article_id or display_id,
             'display_id': display_id,
             'title': title,
             'description': description,
diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py
deleted file mode 100644 (file)
index 10239c9..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
-    _TEST = {
-        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
-        'md5': '19eaa9a39cf9b9804d982e654dc791ee',
-        'info_dict': {
-            'id': '2133',
-            'ext': 'mp4',
-            'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
-            'description': 'md5:cc12448686b5600baae9261d3e180910',
-            'thumbnail': 're:^https?://.*\.jpg\?\d+$',
-        }
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._search_regex(
-            r'"videoId":\s(\d+),', webpage, 'video ID')
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._og_search_thumbnail(webpage)
-
-        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        formats = []
-        for src, type_ in sources:
-            if type_ == 'video/mp4':
-                formats.append({
-                    'format_id': 'mp4_sd',
-                    'preference': 1,
-                    'url': src,
-                })
-            elif type_ == 'video/webm':
-                formats.append({
-                    'format_id': 'webm_sd',
-                    'preference': 0,
-                    'url': src,
-                })
-            elif type_ == 'application/x-mpegURL':
-                formats.extend(
-                    self._extract_m3u8_formats(src, display_id, preference=-1))
-            else:
-                self.report_warning(
-                    'Encountered unexpected format: %s' % type_)
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'display_id': display_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-            'description': description,
-        }
index 6da701a39fff3ebec1bf9aa0cdf095b904a83cf4..7a5a533b7473bc483e64915ed2065c12c9adbdc6 100644 (file)
@@ -50,8 +50,6 @@ class ThePlatformBaseIE(OnceIE):
             else:
                 formats.append(_format)
 
-        self._sort_formats(formats)
-
         subtitles = self._parse_smil_subtitles(meta, default_ns)
 
         return formats, subtitles
@@ -241,6 +239,7 @@ class ThePlatformIE(ThePlatformBaseIE):
             smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
 
         formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
+        self._sort_formats(formats)
 
         ret = self.get_metadata(path, video_id)
         combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
index 79f036fe4eca77f57ddd9e1fd912317f9af00ba5..78174178e6ef69362462f96f997b7a37a640a275 100644 (file)
@@ -76,7 +76,11 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
-            self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
+            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
+
+        if not cfg_url:
+            inputs = self._hidden_inputs(webpage)
+            cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
@@ -132,7 +136,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
 
         categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
-        categories = categories_str.split(', ') if categories_str is not None else []
+        categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else []
 
         return {
             'id': video_id,
@@ -186,13 +190,14 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
 
     _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
-    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
-    _UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
+    _DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
+    _UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
+    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
 
     _TESTS = [{
         # anonymous uploader, no categories
         'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
-        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'md5': '7e569419fe6d69543d01e6be22f5f7c4',
         'info_dict': {
             'id': '553878',
             'display_id': 'Carmella-Decesare-striptease',
@@ -201,17 +206,16 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
             'thumbnail': 're:https?://.*\.jpg$',
             'duration': 91,
             'age_limit': 18,
-            'uploader': 'Anonymous',
-            'categories': [],
+            'categories': ['Porn Stars'],
         }
     }, {
         # non-anonymous uploader, categories
         'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
-        'md5': '0f5d4d490dbfd117b8607054248a07c0',
+        'md5': 'fcba2636572895aba116171a899a5658',
         'info_dict': {
             'id': '6538',
             'display_id': 'Educational-xxx-video',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Educational xxx video',
             'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
             'thumbnail': 're:https?://.*\.jpg$',
index 63b5d5924fe3d88c95d1f9537fba07044d45e124..bb8b8e23424e7943f2133028aca187d4fcffeab9 100644 (file)
@@ -65,6 +65,9 @@ class TudouIE(InfoExtractor):
         if quality:
             info_url += '&hd' + quality
         xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
+        error = xml_data.attrib.get('error')
+        if error is not None:
+            raise ExtractorError('Tudou said: %s' % error, expected=True)
         final_url = xml_data.text
         return final_url
 
index dc3a8334a6b335143dff417d805a26df412d8783..f3aba3530614929f1eddd2e0c137006edbc4fb70 100644 (file)
@@ -58,7 +58,9 @@ class TvigleIE(InfoExtractor):
         if not video_id:
             webpage = self._download_webpage(url, display_id)
             video_id = self._html_search_regex(
-                r'class="video-preview current_playing" id="(\d+)">',
+                (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
+                 r'var\s+cloudId\s*=\s*["\'](\d+)',
+                 r'class="video-preview current_playing" id="(\d+)"'),
                 webpage, 'video id')
 
         video_data = self._download_json(
index 1f32ea2ebe2c917af0e645799c88b474542249bb..ea673054fdc7135a203cca8db00dc128344b0829 100644 (file)
@@ -260,6 +260,17 @@ class TwitterIE(InfoExtractor):
             'upload_date': '20140615',
         },
         'add_ie': ['Vine'],
+    }, {
+        'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+        # md5 constantly changes
+        'info_dict': {
+            'id': '719944021058060289',
+            'ext': 'mp4',
+            'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+            'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
+            'uploader_id': 'captainamerica',
+            'uploader': 'Captain America',
+        },
     }]
 
     def _real_extract(self, url):
@@ -284,17 +295,6 @@ class TwitterIE(InfoExtractor):
             'title': username + ' - ' + title,
         }
 
-        card_id = self._search_regex(
-            r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
-        if card_id:
-            card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
-            info.update({
-                '_type': 'url_transparent',
-                'ie_key': 'TwitterCard',
-                'url': card_url,
-            })
-            return info
-
         mobj = re.search(r'''(?x)
             <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
                 <source[^>]+video-src="(?P<url>[^"]+)"
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py
deleted file mode 100644 (file)
index 1d52cbc..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    qualities,
-)
-
-
-class UbuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
-    _TEST = {
-        'url': 'http://ubu.com/film/her_noise.html',
-        'md5': '138d5652618bf0f03878978db9bef1ee',
-        'info_dict': {
-            'id': 'her_noise',
-            'ext': 'm4v',
-            'title': 'Her Noise - The Making Of (2007)',
-            'duration': 3600,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(
-            r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
-
-        duration = int_or_none(self._html_search_regex(
-            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
-            invscale=60)
-
-        formats = []
-        FORMAT_REGEXES = [
-            ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
-            ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
-        ]
-        preference = qualities([fid for fid, _ in FORMAT_REGEXES])
-        for format_id, format_regex in FORMAT_REGEXES:
-            m = re.search(format_regex, webpage)
-            if m:
-                formats.append({
-                    'url': m.group(1),
-                    'format_id': format_id,
-                    'preference': preference(format_id),
-                })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'duration': duration,
-            'formats': formats,
-        }
index b5fe753d7115923d16ed6cf7de34c8723368f82f..54605d863027968a4a15c5358b9f98539c69c4b3 100644 (file)
@@ -41,6 +41,12 @@ class UstreamIE(InfoExtractor):
             'uploader': 'sportscanadatv',
         },
         'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+    }, {
+        'url': 'http://www.ustream.tv/embed/10299409',
+        'info_dict': {
+            'id': '10299409',
+        },
+        'playlist_count': 3,
     }]
 
     def _real_extract(self, url):
@@ -55,10 +61,12 @@ class UstreamIE(InfoExtractor):
         if m.group('type') == 'embed':
             video_id = m.group('id')
             webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(
-                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
-            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
-            return self.url_result(desktop_url, 'Ustream')
+            content_video_ids = self._parse_json(self._search_regex(
+                r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+                'content video IDs'), video_id)
+            return self.playlist_result(
+                map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+                video_id)
 
         params = self._download_json(
             'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
index 9369abaf8f7bdfa2b220c39d02f9460dbab711c2..84698371a8ab2daf77faae1684141eb32425f232 100644 (file)
@@ -2,11 +2,19 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+    compat_parse_qs,
+)
+from ..utils import (
+    clean_html,
+    remove_start,
+)
 
 
 class Varzesh3IE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
         'md5': '2a933874cb7dce4366075281eb49e855',
         'info_dict': {
@@ -15,8 +23,19 @@ class Varzesh3IE(InfoExtractor):
             'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
             'description': 'فصل ۲۰۱۵-۲۰۱۴',
             'thumbnail': 're:^https?://.*\.jpg$',
-        }
-    }
+        },
+        'skip': 'HTTP 404 Error',
+    }, {
+        'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
+        'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
+        'info_dict': {
+            'id': '112785',
+            'ext': 'mp4',
+            'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
+            'description': 'فوتبال 120',
+        },
+        'expected_warnings': ['description'],
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
@@ -26,15 +45,30 @@ class Varzesh3IE(InfoExtractor):
         video_url = self._search_regex(
             r'<source[^>]+src="([^"]+)"', webpage, 'video url')
 
-        title = self._og_search_title(webpage)
+        title = remove_start(self._html_search_regex(
+            r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
+
         description = self._html_search_regex(
             r'(?s)<div class="matn">(.+?)</div>',
-            webpage, 'description', fatal=False)
-        thumbnail = self._og_search_thumbnail(webpage)
+            webpage, 'description', default=None)
+        if description is None:
+            description = clean_html(self._html_search_meta('description', webpage))
+
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+        if thumbnail is None:
+            fb_sharer_url = self._search_regex(
+                r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
+                webpage, 'facebook sharer URL', fatal=False)
+            sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
+            thumbnail = sharer_params.get('p[images][0]', [None])[0]
 
         video_id = self._search_regex(
             r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
-            webpage, display_id, default=display_id)
+            webpage, display_id, default=None)
+        if video_id is None:
+            video_id = self._search_regex(
+                'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
+                default=display_id)
 
         return {
             'url': video_url,
index 46c785ae183d72207ab12500618f3eb7b765373d..95daf4dfdf2155dbbab26f2896cf3c42e0f33e2f 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .ooyala import OoyalaIE
 from ..utils import ExtractorError
 
 
@@ -14,13 +13,21 @@ class ViceIE(InfoExtractor):
         'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
         'info_dict': {
             'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
             'duration': 725.983,
         },
-        'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            'skip_download': True,
+    }, {
+        'url': 'http://www.vice.com/video/how-to-hack-a-car',
+        'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
+        'info_dict': {
+            'id': '3jstaBeXgAs',
+            'ext': 'mp4',
+            'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
+            'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
+            'uploader_id': 'MotherboardTV',
+            'uploader': 'Motherboard',
+            'upload_date': '20140529',
         },
     }, {
         'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
@@ -39,11 +46,14 @@ class ViceIE(InfoExtractor):
         try:
             embed_code = self._search_regex(
                 r'embedCode=([^&\'"]+)', webpage,
-                'ooyala embed code')
-            ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+                'ooyala embed code', default=None)
+            if embed_code:
+                return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
+            youtube_id = self._search_regex(
+                r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
+            return self.url_result(youtube_id, 'Youtube')
         except ExtractorError:
             raise ExtractorError('The page doesn\'t contain a video', expected=True)
-        return self.url_result(ooyala_url, ie='Ooyala')
 
 
 class ViceShowIE(InfoExtractor):
index 0ffc7ff7dc9185a3a3ec5c0fd14d302872662dda..2ed5d964344211c22d2260b1946273772434db8b 100644 (file)
@@ -14,8 +14,11 @@ class VideoDetectiveIE(InfoExtractor):
             'id': '194487',
             'ext': 'mp4',
             'title': 'KICK-ASS 2',
-            'description': 'md5:65ba37ad619165afac7d432eaded6013',
-            'duration': 138,
+            'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
     }
 
@@ -24,4 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage)
         query = compat_urlparse.urlparse(og_video).query
-        return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
+        return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key())
index 707a5735ad5463fec1d6996db4fd0b381a9205bf..a4833a8584b05e65c0058b6212fc3e30fdb16c4a 100644 (file)
@@ -81,7 +81,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                             \.
                         )?
                         vimeo(?P<pro>pro)?\.com/
-                        (?!channels/[^/?#]+/?(?:$|[?#])|(?:album|ondemand)/)
+                        (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
                         (?:.*?/)?
                         (?:
                             (?:
@@ -90,6 +90,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                             )?
                         (?:videos?/)?
                         (?P<id>[0-9]+)
+                        (?:/[\da-f]+)?
                         /?(?:[?&].*)?(?:[#].*)?$
                     '''
     IE_NAME = 'vimeo'
@@ -232,6 +233,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'url': 'https://vimeo.com/7809605',
             'only_matching': True,
         },
+        {
+            'url': 'https://vimeo.com/160743502/abd0e13fb4',
+            'only_matching': True,
+        }
     ]
 
     @staticmethod
diff --git a/youtube_dl/extractor/wayofthemaster.py b/youtube_dl/extractor/wayofthemaster.py
deleted file mode 100644 (file)
index af7bb8b..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class WayOfTheMasterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
-
-    _TEST = {
-        'url': 'http://www.wayofthemaster.com/hbks.shtml',
-        'md5': '5316b57487ada8480606a93cb3d18d24',
-        'info_dict': {
-            'id': 'hbks',
-            'ext': 'mp4',
-            'title': 'Intelligent Design vs. Evolution',
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._search_regex(
-            r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
-            webpage, 'title', default=None)
-        if title is None:
-            title = self._html_search_regex(
-                r'<title>(.*?)</title>', webpage, 'page title')
-
-        url_base = self._search_regex(
-            r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
-            webpage, 'URL base')
-        formats = [{
-            'format_id': 'low',
-            'quality': 1,
-            'url': url_base + '_low.mp4',
-        }, {
-            'format_id': 'high',
-            'quality': 2,
-            'url': url_base + '_high.mp4',
-        }]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-        }
index 236ff403bd08f941a2eb023cd41c3bb21c49d4c3..b113ab1c4891fdf96898d359cf38779d61b394f8 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 class XboxClipsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
     _TEST = {
-        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+        'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
         'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
         'info_dict': {
             'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
index 188066561c3ebd571b5ff9d6afd4400bd3ef2f34..44f98d294909a75f44f9c01e3a2ce0e7c66d86b5 100644 (file)
@@ -125,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         if login_results is False:
             return False
 
+        error_msg = self._html_search_regex(
+            r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
+            login_results, 'error message', default=None)
+        if error_msg:
+            raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
+
         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 
@@ -1818,20 +1824,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a single video
         # the id of the playlist is just 'RD' + video_id
-        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(
-            url, playlist_id, 'Downloading Youtube mix')
+        ids = []
+        last_id = playlist_id[-11:]
+        for n in itertools.count(1):
+            url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+            webpage = self._download_webpage(
+                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+            new_ids = orderedSet(re.findall(
+                r'''(?xs)data-video-username=".*?".*?
+                           href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+                webpage))
+            # Fetch new pages until all the videos are repeated, it seems that
+            # there are always 51 unique videos.
+            new_ids = [_id for _id in new_ids if _id not in ids]
+            if not new_ids:
+                break
+            ids.extend(new_ids)
+            last_id = ids[-1]
+
+        url_results = self._ids_to_results(ids)
+
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
         title_span = (
             search_title('playlist-title') or
             search_title('title long-title') or
             search_title('title'))
         title = clean_html(title_span)
-        ids = orderedSet(re.findall(
-            r'''(?xs)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
-            webpage))
-        url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
 
@@ -1884,7 +1902,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         if video:
             return video
 
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+        if playlist_id.startswith(('RD', 'UL', 'PU')):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
 
@@ -1987,8 +2005,8 @@ class YoutubeUserIE(YoutubeChannelIE):
     def suitable(cls, url):
         # Don't return True if the url can be extracted with other youtube
         # extractor, the regex would is too permissive and it would match.
-        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
-        if any(ie.suitable(url) for ie in other_ies):
+        other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_yt_ies):
             return False
         else:
             return super(YoutubeUserIE, cls).suitable(url)
index b64cd396bca6f2ec4457e64a294ec7bc9208bbf1..1793a878cb57b60ab7e59cb55f17eb4127c32f92 100644 (file)
@@ -175,7 +175,8 @@ class FFmpegPostProcessor(PostProcessor):
         # Always use 'file:' because the filename may contain ':' (ffmpeg
         # interprets that as a protocol) or can start with '-' (-- is broken in
         # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
-        return 'file:' + fn
+        # Also leave '-' intact in order not to break streaming to stdout.
+        return 'file:' + fn if fn != '-' else fn
 
 
 class FFmpegExtractAudioPP(FFmpegPostProcessor):
index c91aa068217539558e1f32599fa0d5e5b3d6bf60..f333e471275a69cbd158828c90f0ed1b5522582f 100644 (file)
@@ -1794,6 +1794,8 @@ def urlencode_postdata(*args, **kargs):
 
 
 def update_url_query(url, query):
+    if not query:
+        return url
     parsed_url = compat_urlparse.urlparse(url)
     qs = compat_parse_qs(parsed_url.query)
     qs.update(query)
index 167b16e24c874b524e2a78e1d6844657c0b6c7e0..140a67847df1bce886625ea7db1e0694c072df4c 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.04.06'
+__version__ = '2016.04.19'