Merge remote-tracking branch 'liudongmiao/patch-subtitle'

author Philipp Hagemeister <phihag@phihag.de>

Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)
diff --git a/Makefile b/Makefile

index c079761efa9b2e60887575f4cd7626d0abe469a2..088a9320bddfd367babd928bc96c71f3eaa4d9de 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,10 +6,10 @@ clean:
  cleanall: clean
         rm -f youtube-dl youtube-dl.exe
  
-PREFIX=/usr/local
-BINDIR=$(PREFIX)/bin
-MANDIR=$(PREFIX)/man
-PYTHON=/usr/bin/env python
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+MANDIR ?= $(PREFIX)/man
+PYTHON ?= /usr/bin/env python
  
  # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
  ifeq ($(PREFIX),/usr)
diff --git a/README.md b/README.md

index fb2f776c9a9395a5aa9629ddcc347ebe95d74804..ba350b90589935d5f19a6b0f1282fe26ac206e4c 100644 (file)
--- a/README.md
+++ b/README.md
@@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget:
  
  Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
  
+OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
+
+    brew install youtube-dl
+
+You can also use pip:
+
+    sudo pip install youtube-dl
+
  Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
  
  # DESCRIPTION
@@ -38,12 +46,6 @@ which means you can modify it, redistribute it or use it however you like.
                                       playlist or the command line) if an error
                                       occurs
      --dump-user-agent                display the current browser identification
-    --user-agent UA                  specify a custom user agent
-    --referer REF                    specify a custom referer, use if the video
-                                     access is restricted to one domain
-    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
-                                     separated by a colon ':'. You can use this
-                                     option multiple times
      --list-extractors                List all supported extractors and the URLs
                                       they would handle
      --extractor-descriptions         Output descriptions of all supported
@@ -51,35 +53,22 @@ which means you can modify it, redistribute it or use it however you like.
      --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
                                       an empty string (--proxy "") for direct
                                       connection
-    --no-check-certificate           Suppress HTTPS certificate validation.
-    --prefer-insecure                Use an unencrypted connection to retrieve
-                                     information about the video. (Currently
-                                     supported only for YouTube)
-    --cache-dir DIR                  Location in the filesystem where youtube-dl
-                                     can store some downloaded information
-                                     permanently. By default $XDG_CACHE_HOME
-                                     /youtube-dl or ~/.cache/youtube-dl . At the
-                                     moment, only YouTube player files (for
-                                     videos with obfuscated signatures) are
-                                     cached, but that may change.
-    --no-cache-dir                   Disable filesystem caching
      --socket-timeout None            Time to wait before giving up, in seconds
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
      --default-search PREFIX          Use this prefix for unqualified URLs. For
                                       example "gvsearch2:" downloads two videos
                                       from google videos for  youtube-dl "large
                                       apple". Use the value "auto" to let
-                                     youtube-dl guess. The default value "error"
-                                     just throws an error.
+                                     youtube-dl guess ("auto_warning" to emit a
+                                     warning when guessing). "error" just throws
+                                     an error. The default value "fixup_error"
+                                     repairs broken URLs, but emits an error if
+                                     this is not possible instead of searching.
      --ignore-config                  Do not read configuration files. When given
                                       in the global configuration file /etc
                                       /youtube-dl.conf: do not read the user
                                       configuration in ~/.config/youtube-dl.conf
                                       (%APPDATA%/youtube-dl/config.txt on
                                       Windows)
-    --encoding ENCODING              Force the specified encoding (experimental)
  
  ## Video Selection:
      --playlist-start NUMBER          playlist video to start at (default is 1)
@@ -125,9 +114,9 @@ which means you can modify it, redistribute it or use it however you like.
                                       of SIZE.
  
  ## Filesystem Options:
-    -t, --title                      use title in file name (default)
+    -a, --batch-file FILE            file containing URLs to download ('-' for
+                                     stdin)
      --id                             use only video ID in file name
-    -l, --literal                    [deprecated] alias of --title
      -A, --auto-number                number downloaded files starting from 00000
      -o, --output TEMPLATE            output filename template. Use %(title)s to
                                       get the title, %(uploader)s for the
@@ -160,18 +149,15 @@ which means you can modify it, redistribute it or use it however you like.
      --restrict-filenames             Restrict filenames to only ASCII
                                       characters, and avoid "&" and spaces in
                                       filenames
-    -a, --batch-file FILE            file containing URLs to download ('-' for
-                                     stdin)
-    --load-info FILE                 json file containing the video information
-                                     (created with the "--write-json" option)
+    -t, --title                      [deprecated] use title in file name
+                                     (default)
+    -l, --literal                    [deprecated] alias of --title
      -w, --no-overwrites              do not overwrite files
      -c, --continue                   force resume of partially downloaded files.
                                       By default, youtube-dl will resume
                                       downloads if possible.
      --no-continue                    do not resume partially downloaded files
                                       (restart from beginning)
-    --cookies FILE                   file to read cookies from and dump cookie
-                                     jar in
      --no-part                        do not use .part files
      --no-mtime                       do not use the Last-modified header to set
                                       the file modification time
@@ -181,6 +167,19 @@ which means you can modify it, redistribute it or use it however you like.
      --write-annotations              write video annotations to a .annotation
                                       file
      --write-thumbnail                write thumbnail image to disk
+    --load-info FILE                 json file containing the video information
+                                     (created with the "--write-json" option)
+    --cookies FILE                   file to read cookies from and dump cookie
+                                     jar in
+    --cache-dir DIR                  Location in the filesystem where youtube-dl
+                                     can store some downloaded information
+                                     permanently. By default $XDG_CACHE_HOME
+                                     /youtube-dl or ~/.cache/youtube-dl . At the
+                                     moment, only YouTube player files (for
+                                     videos with obfuscated signatures) are
+                                     cached, but that may change.
+    --no-cache-dir                   Disable filesystem caching
+    --rm-cache-dir                   Delete all filesystem cache files
  
  ## Verbosity / Simulation Options:
      -q, --quiet                      activates quiet mode
@@ -210,6 +209,22 @@ which means you can modify it, redistribute it or use it however you like.
                                       problems
      --print-traffic                  Display sent and read HTTP traffic
  
+## Workarounds:
+    --encoding ENCODING              Force the specified encoding (experimental)
+    --no-check-certificate           Suppress HTTPS certificate validation.
+    --prefer-insecure                Use an unencrypted connection to retrieve
+                                     information about the video. (Currently
+                                     supported only for YouTube)
+    --user-agent UA                  specify a custom user agent
+    --referer REF                    specify a custom referer, use if the video
+                                     access is restricted to one domain
+    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
+                                     separated by a colon ':'. You can use this
+                                     option multiple times
+    --bidi-workaround                Work around terminals that lack
+                                     bidirectional text support. Requires bidiv
+                                     or fribidi executable in PATH
+
  ## Video Format Options:
      -f, --format FORMAT              video format code, specify the order of
                                       preference using slashes: "-f 22/17/18".
@@ -296,10 +311,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
  
  In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
  
-    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
-    youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
-    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
-    youtube-dl_test_video_.mp4          # A simple file name
+```bash
+$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
+youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
+$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+youtube-dl_test_video_.mp4          # A simple file name
+```
  
  # VIDEO SELECTION
  
@@ -310,14 +327,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
   
  Examples:
  
-    # Download only the videos uploaded in the last 6 months
-    $ youtube-dl --dateafter now-6months
+```bash
+# Download only the videos uploaded in the last 6 months
+$ youtube-dl --dateafter now-6months
  
-    # Download only the videos uploaded on January 1, 1970
-    $ youtube-dl --date 19700101
+# Download only the videos uploaded on January 1, 1970
+$ youtube-dl --date 19700101
  
-    $ # will only download the videos uploaded in the 200x decade
-    $ youtube-dl --dateafter 20000101 --datebefore 20091231
+$ # will only download the videos uploaded in the 200x decade
+$ youtube-dl --dateafter 20000101 --datebefore 20091231
+```
  
  # FAQ
  
@@ -392,49 +411,48 @@ If you want to add support for a new site, you can follow this quick list (assum
  2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
  3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
  4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
-
-        # coding: utf-8
-        from __future__ import unicode_literals
-
-        import re
-
-        from .common import InfoExtractor
-        
-        
-        class YourExtractorIE(InfoExtractor):
-            _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
-            _TEST = {
-                'url': 'http://yourextractor.com/watch/42',
-                'md5': 'TODO: md5 sum of the first 10KiB of the video file',
-                'info_dict': {
-                    'id': '42',
-                    'ext': 'mp4',
-                    'title': 'Video title goes here',
-                    # TODO more properties, either as:
-                    # * A value
-                    # * MD5 checksum; start the string with md5:
-                    # * A regular expression; start the string with re:
-                    # * Any Python type (for example int or float)
-                }
+    ```python
+    # coding: utf-8
+    from __future__ import unicode_literals
+
+    import re
+
+    from .common import InfoExtractor
+
+
+    class YourExtractorIE(InfoExtractor):
+        _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+        _TEST = {
+            'url': 'http://yourextractor.com/watch/42',
+            'md5': 'TODO: md5 sum of the first 10KiB of the video file',
+            'info_dict': {
+                'id': '42',
+                'ext': 'mp4',
+                'title': 'Video title goes here',
+                # TODO more properties, either as:
+                # * A value
+                # * MD5 checksum; start the string with md5:
+                # * A regular expression; start the string with re:
+                # * Any Python type (for example int or float)
              }
+        }
  
-            def _real_extract(self, url):
-                mobj = re.match(self._VALID_URL, url)
-                video_id = mobj.group('id')
-
-                # TODO more code goes here, for example ...
-                webpage = self._download_webpage(url, video_id)
-                title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
-
-                return {
-                    'id': video_id,
-                    'title': title,
-                    # TODO more properties (see youtube_dl/extractor/common.py)
-                }
+        def _real_extract(self, url):
+            mobj = re.match(self._VALID_URL, url)
+            video_id = mobj.group('id')
  
+            # TODO more code goes here, for example ...
+            webpage = self._download_webpage(url, video_id)
+            title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
  
+            return {
+                'id': video_id,
+                'title': title,
+                # TODO more properties (see youtube_dl/extractor/common.py)
+            }
+    ```
  5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
  7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
  8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
  9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
diff --git a/test/helper.py b/test/helper.py

index b7299fb82c2e541fc520ba11c5c52d9edcc972e3..22d7638606841bc0250665402e09fb49655e5d89 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict):
                  u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
  
      # Check for the presence of mandatory fields
-    for key in ('id', 'url', 'title', 'ext'):
-        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+    if got_dict.get('_type') != 'playlist':
+        for key in ('id', 'url', 'title', 'ext'):
+            self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
      # Check for mandatory fields that are automatically set by YoutubeDL
      for key in ['webpage_url', 'extractor', 'extractor_key']:
          self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index 0ff47cf1ead4a2c89aa24a83023a1d29cde31717..b1ad30bf10ad08a19e14658a8299c006f54c03fa 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
  
      def test_facebook_matching(self):
          self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
  
      def test_no_duplicates(self):
          ies = gen_extractors()
diff --git a/test/test_download.py b/test/test_download.py

index d6540588c130f6bafacd4ef7d077e6debf8d911d..c8d4ec2c87c97773d60c52ffb342809b8e2a0ffb 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -63,15 +63,21 @@ def generator(test_case):
      def test_template(self):
          ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
          other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
+        is_playlist = any(k.startswith('playlist') for k in test_case)
+        test_cases = test_case.get(
+            'playlist', [] if is_playlist else [test_case])
+
          def print_skipping(reason):
              print('Skipping %s: %s' % (test_case['name'], reason))
          if not ie.working():
              print_skipping('IE marked as not _WORKING')
              return
-        if 'playlist' not in test_case:
-            info_dict = test_case.get('info_dict', {})
-            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+
+        for tc in test_cases:
+            info_dict = tc.get('info_dict', {})
+            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
                  raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+
          if 'skip' in test_case:
              print_skipping(test_case['skip'])
              return
@@ -81,6 +87,9 @@ def generator(test_case):
                  return
  
          params = get_params(test_case.get('params', {}))
+        if is_playlist and 'playlist' not in test_case:
+            params.setdefault('extract_flat', True)
+            params.setdefault('skip_download', True)
  
          ydl = YoutubeDL(params)
          ydl.add_default_info_extractors()
@@ -93,7 +102,6 @@ def generator(test_case):
          def get_tc_filename(tc):
              return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
  
-        test_cases = test_case.get('playlist', [test_case])
          def try_rm_tcs_files():
              for tc in test_cases:
                  tc_filename = get_tc_filename(tc)
@@ -105,7 +113,10 @@ def generator(test_case):
              try_num = 1
              while True:
                  try:
-                    ydl.download([test_case['url']])
+                    # We're not using .download here sine that is just a shim
+                    # for outside error handling, and returns the exit code
+                    # instead of the result dict.
+                    res_dict = ydl.extract_info(test_case['url'])
                  except (DownloadError, ExtractorError) as err:
                      # Check if the exception is not a network related one
                      if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
@@ -121,6 +132,17 @@ def generator(test_case):
                  else:
                      break
  
+            if is_playlist:
+                self.assertEqual(res_dict['_type'], 'playlist')
+                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+            if 'playlist_mincount' in test_case:
+                self.assertGreaterEqual(
+                    len(res_dict['entries']),
+                    test_case['playlist_mincount'],
+                    'Expected at least %d in playlist %s, but got only %d' % (
+                        test_case['playlist_mincount'], test_case['url'],
+                        len(res_dict['entries'])))
+
              for tc in test_cases:
                  tc_filename = get_tc_filename(tc)
                  if not test_case.get('params', {}).get('skip_download', False):
diff --git a/test/test_playlists.py b/test/test_playlists.py

index c221c47b99daabc668607f09744d078e726b09ff..6448fea38586970a640b33045da99be33a83add0 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -1,6 +1,17 @@
  #!/usr/bin/env python
  # encoding: utf-8
  
+## DEPRECATED FILE!
+# Add new tests to the extractors themselves, like this:
+# _TEST = {
+#    'url': 'http://example.com/playlist/42',
+#    'playlist_mincount': 99,
+#    'info_dict': {
+#        'id': '42',
+#        'title': 'Playlist number forty-two',
+#    }
+# }
+
  from __future__ import unicode_literals
  
  # Allow direct execution
@@ -193,10 +204,10 @@ class TestPlaylists(unittest.TestCase):
      def test_bandcamp_album(self):
          dl = FakeYDL()
          ie = BandcampAlbumIE(dl)
-        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
          self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Nightmare Night EP')
-        assertGreaterEqual(self, len(result['entries']), 4)
+        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
+        assertGreaterEqual(self, len(result['entries']), 9)
          
      def test_smotri_community(self):
          dl = FakeYDL()
diff --git a/test/test_utils.py b/test/test_utils.py

index 51eb0b6b936c7ea5d21cfef9bdc0b70f2ee7663a..e26cc5b0cc0e9df46fc00ae9084930ed384aff36 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
          d = json.loads(stripped)
          self.assertEqual(d, [{"id": "532cb", "x": 3}])
  
-    def test_uppercase_escpae(self):
+    def test_uppercase_escape(self):
          self.assertEqual(uppercase_escape(u'aä'), u'aä')
          self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
  
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py

index f0f33f1db2643b6eeb1c4b82202e071c40f826fb..604e76ab60ba42081c3b4779e77e2963038f43e5 100644 (file)
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -1,5 +1,7 @@
  #!/usr/bin/env python
  
+from __future__ import unicode_literals
+
  # Allow direct execution
  import os
  import sys
@@ -16,52 +18,64 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
  
  _TESTS = [
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
-        u'js',
+        'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
+        'js',
          86,
-        u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
+        '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
      ),
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
-        u'js',
+        'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
+        'js',
          85,
-        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
+        '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
      ),
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
-        u'js',
+        'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
+        'js',
          90,
-        u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
+        ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
      ),
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
-        u'js',
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+        'js',
          84,
-        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+        'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
      ),
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
-        u'js',
-        u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
-        u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+        'js',
+        '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+        'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
      ),
      (
-        u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
-        u'swf',
+        'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
+        'swf',
          86,
-        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
+        'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
      ),
      (
-        u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
-        u'swf',
-        u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
-        u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
+        'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
+        'swf',
+        'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
+        '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
      ),
      (
-        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
-        u'js',
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
+        'js',
          84,
-        u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+    ),
+    (
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
+        'js',
+        83,
+        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+    ),
+    (
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
+        'js',
+        '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
+        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
      )
  ]
  
@@ -75,7 +89,7 @@ class TestSignature(unittest.TestCase):
  
  
  def make_tfunc(url, stype, sig_input, expected_sig):
-    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
+    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
      assert m, '%r should follow URL format' % url
      test_id = m.group(1)
  
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 4ff1ae0e8970e43994fcf5a83a2e0719a0e324fa..e7194f3e314b78bd44cb71b755e2de9c666829fe 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -162,6 +162,7 @@ class YoutubeDL(object):
      default_search:    Prepend this string if an input url is not valid.
                         'auto' for elaborate guessing
      encoding:          Use this encoding instead of the system-specified.
+    extract_flat:      Do not resolve URLs, return the immediate result.
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -275,7 +276,7 @@ class YoutubeDL(object):
              return message
  
          assert hasattr(self, '_output_process')
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
          line_count = message.count('\n') + 1
          self._output_process.stdin.write((message + '\n').encode('utf-8'))
          self._output_process.stdin.flush()
@@ -303,7 +304,7 @@ class YoutubeDL(object):
  
      def to_stderr(self, message):
          """Print message to stderr."""
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
          if self.params.get('logger'):
              self.params['logger'].error(message)
          else:
@@ -558,7 +559,12 @@ class YoutubeDL(object):
          Returns the resolved ie_result.
          """
  
-        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
+        result_type = ie_result.get('_type', 'video')
+
+        if self.params.get('extract_flat', False):
+            if result_type in ('url', 'url_transparent'):
+                return ie_result
+
          if result_type == 'video':
              self.add_extra_info(ie_result, extra_info)
              return self.process_video_result(ie_result, download=download)
@@ -849,7 +855,7 @@ class YoutubeDL(object):
          # Keep for backwards compatibility
          info_dict['stitle'] = info_dict['title']
  
-        if not 'format' in info_dict:
+        if 'format' not in info_dict:
              info_dict['format'] = info_dict['ext']
  
          reason = self._match_entry(info_dict)
@@ -1234,21 +1240,18 @@ class YoutubeDL(object):
          if not self.params.get('verbose'):
              return
  
+        if type('') is not compat_str:
+            # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+            self.report_warning(
+                'Your Python is broken! Update to a newer and supported version')
+
          encoding_str = (
              '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
                  locale.getpreferredencoding(),
                  sys.getfilesystemencoding(),
                  sys.stdout.encoding,
                  self.get_encoding()))
-        try:
-            write_string(encoding_str, encoding=None)
-        except:
-            errmsg = 'Failed to write encoding string %r' % encoding_str
-            try:
-                sys.stdout.write(errmsg)
-            except:
-                pass
-            raise IOError(errmsg)
+        write_string(encoding_str, encoding=None)
  
          self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
          try:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index de7bc0f5fed96675f8ed84146a09e0ca306da67a..f156ba3a03ae551d54cd62fc24ddb170f23da131 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -66,6 +66,11 @@ __authors__  = (
      'Naglis Jonaitis',
      'Charles Chen',
      'Hassaan Ali',
+    'Dobrosław Żybort',
+    'David Fabijan',
+    'Sebastian Haas',
+    'Alexander Kirk',
+    'Erik Johnson',
  )
  
  __license__ = 'Public Domain'
@@ -76,6 +81,7 @@ import optparse
  import os
  import random
  import shlex
+import shutil
  import sys
  
  
@@ -222,6 +228,7 @@ def parseOpts(overrideArguments=None):
      downloader     = optparse.OptionGroup(parser, 'Download Options')
      postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
      filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
+    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
      verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
  
      general.add_option('-h', '--help',
@@ -238,14 +245,6 @@ def parseOpts(overrideArguments=None):
      general.add_option('--dump-user-agent',
              action='store_true', dest='dump_user_agent',
              help='display the current browser identification', default=False)
-    general.add_option('--user-agent',
-            dest='user_agent', help='specify a custom user agent', metavar='UA')
-    general.add_option('--referer',
-            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
-            metavar='REF', default=None)
-    general.add_option('--add-header',
-            dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
-            metavar='FIELD:VALUE')
      general.add_option('--list-extractors',
              action='store_true', dest='list_extractors',
              help='List all supported extractors and the URLs they would handle', default=False)
@@ -255,33 +254,17 @@ def parseOpts(overrideArguments=None):
      general.add_option(
          '--proxy', dest='proxy', default=None, metavar='URL',
          help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
-    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
-    general.add_option(
-        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
-        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
-    general.add_option(
-        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
-        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
-    general.add_option(
-        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
-        help='Disable filesystem caching')
      general.add_option(
          '--socket-timeout', dest='socket_timeout',
          type=float, default=None, help=u'Time to wait before giving up, in seconds')
-    general.add_option(
-        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
      general.add_option(
          '--default-search',
          dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
      general.add_option(
          '--ignore-config',
          action='store_true',
          help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
-    general.add_option(
-        '--encoding', dest='encoding', metavar='ENCODING',
-        help='Force the specified encoding (experimental)')
  
      selection.add_option(
          '--playlist-start',
@@ -382,6 +365,33 @@ def parseOpts(overrideArguments=None):
              help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
      downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
  
+    workarounds.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')
+    workarounds.add_option(
+        '--no-check-certificate', action='store_true',
+        dest='no_check_certificate', default=False,
+        help='Suppress HTTPS certificate validation.')
+    workarounds.add_option(
+        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+    workarounds.add_option(
+        '--user-agent', metavar='UA',
+        dest='user_agent', help='specify a custom user agent')
+    workarounds.add_option(
+        '--referer', metavar='REF',
+        dest='referer', default=None,
+        help='specify a custom referer, use if the video access is restricted to one domain',
+    )
+    workarounds.add_option(
+        '--add-header', metavar='FIELD:VALUE',
+        dest='headers', action='append',
+        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+    )
+    workarounds.add_option(
+        '--bidi-workaround', dest='bidi_workaround', action='store_true',
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+
      verbosity.add_option('-q', '--quiet',
              action='store_true', dest='quiet', help='activates quiet mode', default=False)
      verbosity.add_option(
@@ -439,12 +449,10 @@ def parseOpts(overrideArguments=None):
              help='Display sent and read HTTP traffic')
  
  
-    filesystem.add_option('-t', '--title',
-            action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
+    filesystem.add_option('-a', '--batch-file',
+            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
      filesystem.add_option('--id',
              action='store_true', dest='useid', help='use only video ID in file name', default=False)
-    filesystem.add_option('-l', '--literal',
-            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
      filesystem.add_option('-A', '--auto-number',
              action='store_true', dest='autonumber',
              help='number downloaded files starting from 00000', default=False)
@@ -470,11 +478,10 @@ def parseOpts(overrideArguments=None):
      filesystem.add_option('--restrict-filenames',
              action='store_true', dest='restrictfilenames',
              help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
-    filesystem.add_option('-a', '--batch-file',
-            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
-    filesystem.add_option('--load-info',
-            dest='load_info_filename', metavar='FILE',
-            help='json file containing the video information (created with the "--write-json" option)')
+    filesystem.add_option('-t', '--title',
+            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
+    filesystem.add_option('-l', '--literal',
+            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
      filesystem.add_option('-w', '--no-overwrites',
              action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
      filesystem.add_option('-c', '--continue',
@@ -482,8 +489,6 @@ def parseOpts(overrideArguments=None):
      filesystem.add_option('--no-continue',
              action='store_false', dest='continue_dl',
              help='do not resume partially downloaded files (restart from beginning)')
-    filesystem.add_option('--cookies',
-            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
      filesystem.add_option('--no-part',
              action='store_true', dest='nopart', help='do not use .part files', default=False)
      filesystem.add_option('--no-mtime',
@@ -501,6 +506,20 @@ def parseOpts(overrideArguments=None):
      filesystem.add_option('--write-thumbnail',
              action='store_true', dest='writethumbnail',
              help='write thumbnail image to disk', default=False)
+    filesystem.add_option('--load-info',
+            dest='load_info_filename', metavar='FILE',
+            help='json file containing the video information (created with the "--write-json" option)')
+    filesystem.add_option('--cookies',
+            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
+    filesystem.add_option(
+        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
+        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+    filesystem.add_option(
+        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
+        help='Disable filesystem caching')
+    filesystem.add_option(
+        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
+        help='Delete all filesystem cache files')
  
  
      postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
@@ -534,6 +553,7 @@ def parseOpts(overrideArguments=None):
      parser.add_option_group(downloader)
      parser.add_option_group(filesystem)
      parser.add_option_group(verbosity)
+    parser.add_option_group(workarounds)
      parser.add_option_group(video_format)
      parser.add_option_group(subtitles)
      parser.add_option_group(authentication)
@@ -694,7 +714,7 @@ def _real_main(argv=None):
          date = DateRange.day(opts.date)
      else:
          date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
          parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
  
      # Do not download videos when there are audio-only formats
@@ -833,9 +853,26 @@ def _real_main(argv=None):
          if opts.update_self:
              update_self(ydl.to_screen, opts.verbose)
  
+        # Remove cache dir
+        if opts.rm_cachedir:
+            if opts.cachedir is None:
+                ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
+            else:
+                if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
+                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
+                    retcode = 141
+                else:
+                    ydl.to_screen(
+                        u'Removing cache dir %s .' % opts.cachedir,
+                        skip_eol=True)
+                    if os.path.exists(opts.cachedir):
+                        ydl.to_screen(u'.', skip_eol=True)
+                        shutil.rmtree(opts.cachedir)
+                    ydl.to_screen(u'.')
+
          # Maybe do nothing
          if (len(all_urls) < 1) and (opts.load_info_filename is None):
-            if not opts.update_self:
+            if not (opts.update_self or opts.rm_cachedir):
                  parser.error(u'you must provide at least one URL')
              else:
                  sys.exit()
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py

index 917f3450e63c62b95551081109c5d3f55f49aeba..9ce97f5fe6c4517ec8b6d681e924bcd387a185cf 100644 (file)
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -292,7 +292,7 @@ class FileDownloader(object):
  
      def real_download(self, filename, info_dict):
          """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by sublcasses')
+        raise NotImplementedError(u'This method must be implemented by subclasses')
  
      def _hook_progress(self, status):
          for ph in self._progress_hooks:
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py

index e6be6ae6c878c9ede7cd2cf3b6be663e22bb8be1..71353f607daead364acbdad83b18b79e61a5bffa 100644 (file)
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
  
      def real_download(self, filename, info_dict):
          man_url = info_dict['url']
+        requested_bitrate = info_dict.get('tbr')
          self.to_screen('[download] Downloading f4m manifest')
          manifest = self.ydl.urlopen(man_url).read()
          self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
  
          doc = etree.fromstring(manifest)
          formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
-        formats = sorted(formats, key=lambda f: f[0])
-        rate, media = formats[-1]
+        if requested_bitrate is None:
+            # get the best format
+            formats = sorted(formats, key=lambda f: f[0])
+            rate, media = formats[-1]
+        else:
+            rate, media = list(filter(
+                lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
          base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
          bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
          metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 8d63d9281d68d496b65d025666fd4d8a2c863b06..9be1d2e0ff5ea24d7b57a2e1dda8224d57b497df 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,3 +1,4 @@
+from .abc import ABCIE
  from .academicearth import AcademicEarthCourseIE
  from .addanime import AddAnimeIE
  from .adultswim import AdultSwimIE
@@ -68,6 +69,7 @@ from .dfb import DFBIE
  from .dotsub import DotsubIE
  from .dreisat import DreiSatIE
  from .drtv import DRTVIE
+from .dump import DumpIE
  from .defense import DefenseGouvFrIE
  from .discovery import DiscoveryIE
  from .divxstage import DivxStageIE
@@ -76,6 +78,10 @@ from .ebaumsworld import EbaumsWorldIE
  from .ehow import EHowIE
  from .eighttracks import EightTracksIE
  from .eitb import EitbIE
+from .ellentv import (
+    EllenTVIE,
+    EllenTVClipsIE,
+)
  from .elpais import ElPaisIE
  from .empflix import EmpflixIE
  from .engadget import EngadgetIE
@@ -111,9 +117,11 @@ from .funnyordie import FunnyOrDieIE
  from .gamekings import GamekingsIE
  from .gameone import GameOneIE
  from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
  from .gametrailers import GametrailersIE
  from .gdcvault import GDCVaultIE
  from .generic import GenericIE
+from .godtube import GodTubeIE
  from .googleplus import GooglePlusIE
  from .googlesearch import GoogleSearchIE
  from .gorillavid import GorillaVidIE
@@ -123,6 +131,7 @@ from .helsinki import HelsinkiIE
  from .hentaistigma import HentaiStigmaIE
  from .hotnewhiphop import HotNewHipHopIE
  from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
  from .huffpost import HuffPostIE
  from .hypem import HypemIE
  from .iconosquare import IconosquareIE
@@ -140,8 +149,10 @@ from .ivi import (
      IviIE,
      IviCompilationIE
  )
+from .izlesene import IzleseneIE
  from .jadorecettepub import JadoreCettePubIE
  from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
  from .jpopsukitv import JpopsukiIE
@@ -151,6 +162,7 @@ from .khanacademy import KhanAcademyIE
  from .kickstarter import KickStarterIE
  from .keek import KeekIE
  from .kontrtube import KontrTubeIE
+from .krasview import KrasViewIE
  from .ku6 import Ku6IE
  from .la7 import LA7IE
  from .lifenews import LifeNewsIE
@@ -172,10 +184,12 @@ from .mdr import MDRIE
  from .metacafe import MetacafeIE
  from .metacritic import MetacriticIE
  from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
  from .mixcloud import MixcloudIE
  from .mlb import MLBIE
  from .mpora import MporaIE
  from .mofosex import MofosexIE
+from .mojvideo import MojvideoIE
  from .mooshare import MooshareIE
  from .morningstar import MorningstarIE
  from .motherless import MotherlessIE
@@ -219,10 +233,14 @@ from .nrk import (
  from .ntv import NTVIE
  from .nytimes import NYTimesIE
  from .nuvid import NuvidIE
-from .oe1 import OE1IE
  from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+)
  from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
  from .pbs import PBSIE
  from .photobucket import PhotobucketIE
  from .playvid import PlayvidIE
@@ -242,6 +260,7 @@ from .ro220 import Ro220IE
  from .rottentomatoes import RottenTomatoesIE
  from .roxwel import RoxwelIE
  from .rtbf import RTBFIE
+from .rtlnl import RtlXlIE
  from .rtlnow import RTLnowIE
  from .rts import RTSIE
  from .rtve import RTVEALaCartaIE
@@ -258,6 +277,7 @@ from .savefrom import SaveFromIE
  from .scivee import SciVeeIE
  from .screencast import ScreencastIE
  from .servingsys import ServingSysIE
+from .shared import SharedIE
  from .sina import SinaIE
  from .slideshare import SlideshareIE
  from .slutload import SlutloadIE
@@ -320,6 +340,8 @@ from .tumblr import TumblrIE
  from .tutv import TutvIE
  from .tvigle import TvigleIE
  from .tvp import TvpIE
+from .tvplay import TVPlayIE
+from .ubu import UbuIE
  from .udemy import (
      UdemyIE,
      UdemyCourseIE
@@ -341,6 +363,7 @@ from .videofyme import VideofyMeIE
  from .videopremium import VideoPremiumIE
  from .videott import VideoTtIE
  from .videoweed import VideoWeedIE
+from .vidme import VidmeIE
  from .vimeo import (
      VimeoIE,
      VimeoChannelIE,
@@ -374,6 +397,7 @@ from .wistia import WistiaIE
  from .worldstarhiphop import WorldStarHipHopIE
  from .wrzuta import WrzutaIE
  from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
  from .xhamster import XHamsterIE
  from .xnxx import XNXXIE
  from .xvideos import XVideosIE
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py

new file mode 100644 (file)

index 0000000..7d89f44
--- /dev/null
+++ b/youtube_dl/extractor/abc.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ABCIE(InfoExtractor):
+    IE_NAME = 'abc.net.au'
+    _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
+        'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+        'info_dict': {
+            'id': '5624716',
+            'ext': 'mp4',
+            'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
+            'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        urls_info_json = self._search_regex(
+            r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
+            flags=re.DOTALL)
+        urls_info = json.loads(urls_info_json.replace('\'', '"'))
+        formats = [{
+            'url': url_info['url'],
+            'width': int(url_info['width']),
+            'height': int(url_info['height']),
+            'tbr': int(url_info['bitrate']),
+            'filesize': int(url_info['filesize']),
+        } for url_info in urls_info]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py

index 7e93bc4df286e71554f272fb69ba43348f3c5da4..74860882628017c5ab7a44f22bd9b05286ad556e 100644 (file)
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -1,5 +1,7 @@
  #coding: utf-8
  
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
@@ -13,13 +15,14 @@ class AparatIE(InfoExtractor):
      _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
  
      _TEST = {
-        u'url': u'http://www.aparat.com/v/wP8On',
-        u'file': u'wP8On.mp4',
-        u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
-        u'info_dict': {
-            u"title": u"تیم گلکسی 11 - زومیت",
+        'url': 'http://www.aparat.com/v/wP8On',
+        'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
+        'info_dict': {
+            'id': 'wP8On',
+            'ext': 'mp4',
+            'title': 'تیم گلکسی 11 - زومیت',
          },
-        #u'skip': u'Extremely unreliable',
+        # 'skip': 'Extremely unreliable',
      }
  
      def _real_extract(self, url):
@@ -29,8 +32,8 @@ class AparatIE(InfoExtractor):
          # Note: There is an easier-to-parse configuration at
          # http://www.aparat.com/video/video/config/videohash/%video_id
          # but the URL in there does not work
-        embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
-                     video_id + u'/vt/frame')
+        embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
+                     video_id + '/vt/frame')
          webpage = self._download_webpage(embed_url, video_id)
  
          video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index dc8657b67c9850c1676af737f319cb4c06bad6d6..4359b88d1b7057944beb126eb8a1c82dbb818758 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -6,6 +6,7 @@ import json
  from .common import InfoExtractor
  from ..utils import (
      compat_urlparse,
+    int_or_none,
  )
  
  
@@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
                  formats.append({
                      'url': format_url,
                      'format': format['type'],
-                    'width': format['width'],
-                    'height': int(format['height']),
+                    'width': int_or_none(format['width']),
+                    'height': int_or_none(format['height']),
                  })
  
              self._sort_formats(formats)
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py

index 30a85c8c1c8d1b3a10a40ac55a577e3402cb487a..7f0da8ab6d5b9f0e62f2af18c74c26185c505259 100644 (file)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,6 +8,8 @@ from ..utils import (
      determine_ext,
      ExtractorError,
      qualities,
+    compat_urllib_parse_urlparse,
+    compat_urllib_parse,
  )
  
  
@@ -44,8 +46,14 @@ class ARDIE(InfoExtractor):
          else:
              video_id = m.group('video_id')
  
+        urlp = compat_urllib_parse_urlparse(url)
+        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
+
          webpage = self._download_webpage(url, video_id)
  
+        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
+            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
          title = self._html_search_regex(
              [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
               r'<meta name="dcterms.title" content="(.*?)"/>',
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index 9591bad8a66254e90247a204b43b10b6db4f6406..d86dbba8e8db9748ecf61b59a36bfffa532a33d1 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
              return any(re.match(r, f['versionCode']) for r in regexes)
          # Some formats may not be in the same language as the url
+        # TODO: Might want not to drop videos that does not match requested language
+        # but to process those formats with lower precedence
          formats = filter(_match_lang, all_formats)
-        formats = list(formats) # in python3 filter returns an iterator
+        formats = list(formats)  # in python3 filter returns an iterator
          if not formats:
              # Some videos are only available in the 'Originalversion'
              # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
-                formats = all_formats
-            else:
-                raise ExtractorError(u'The formats list is empty')
+            # Sometimes there are neither videos of requested lang code
+            # nor original version videos available
+            # For such cases we just take all_formats as is
+            formats = all_formats
+            if not formats:
+                raise ExtractorError('The formats list is empty')
  
          if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
              def sort_key(f):
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py

index 7d558e262ecea44df6b025f0db716b82d975b314..3e461e715e141b1ff4a294eb01b7657d16f05d4b 100644 (file)
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
                      'height': int(m['h']),
                  })
              elif m['type'] == 'original':
-                duration = m['d']
+                duration = float(m['d'])
              elif m['type'] == 'youtube':
                  yt_id = m['link']
                  self.to_screen('Youtube video detected: %s' % yt_id)
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index 25fb79e146b18f50962ba506d01560fbd845dbf2..c51a97ce4327cff934216927948587131dedfa80 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        # The md5 checksum changes
          'info_dict': {
              'id': 'qurhIVlJSB6hzkVi229d8g',
              'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
          return {
              'id': name.split('-')[-1],
              'title': title,
-            'url': f4m_url,
-            'ext': 'flv',
+            'formats': self._extract_f4m_formats(f4m_url, name),
              'description': self._og_search_description(webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
          }
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py

index f7f2f713a59446a68f806b7e62140bfcbc808313..86f0c2861e35f296f594a4ac45bbfe74b799d9e0 100644 (file)
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
      int_or_none,
+    parse_duration,
  )
  
  
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
              'info_dict': {
                  'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
                  'ext': 'mp4',
-                'title': 'Am 1. und 2. August in Oberammergau',
-                'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
+                'title': 'Wenn das Traditions-Theater wackelt',
+                'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
+                'duration': 34,
              }
          },
          {
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Über den Pass',
                  'description': 'Die Eroberung der Alpen: Über den Pass',
+                'duration': 2588,
              }
          },
          {
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
                  'ext': 'aac',
                  'title': '"Keine neuen Schulden im nächsten Jahr"',
                  'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+                'duration': 64,
              }
          },
          {
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Umweltbewusster Häuslebauer',
                  'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+                'duration': 116,
              }
          },
          {
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Folge 1 - Metaphysik',
                  'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'duration': 893,
                  'uploader': 'Eva Maria Steimle',
                  'upload_date': '20140117',
              }
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
              media = {
                  'id': xml_media.get('externalId'),
                  'title': xml_media.find('title').text,
+                'duration': parse_duration(xml_media.find('duration').text),
                  'formats': self._extract_formats(xml_media.find('assets')),
                  'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
                  'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 88f12797c6c351ab8eae943d183c81bc90028e8e..2e6eeac08e403fcf46efe674ed892cec4a7b9a84 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
      clean_html,
      compiled_regex_type,
      ExtractorError,
+    int_or_none,
      RegexNotFoundError,
      sanitize_filename,
      unescapeHTML,
@@ -373,7 +374,8 @@ class InfoExtractor(object):
          else:
              for p in pattern:
                  mobj = re.search(p, string, flags)
-                if mobj: break
+                if mobj:
+                    break
  
          if os.name != 'nt' and sys.stderr.isatty():
              _name = u'\033[0;34m%s\033[0m' % name
@@ -461,8 +463,9 @@ class InfoExtractor(object):
          return self._og_search_property('title', html, **kargs)
  
      def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
-        regexes = self._og_regexes('video')
-        if secure: regexes = self._og_regexes('video:secure_url') + regexes
+        regexes = self._og_regexes('video') + self._og_regexes('video:url')
+        if secure:
+            regexes = self._og_regexes('video:secure_url') + regexes
          return self._html_search_regex(regexes, html, name, **kargs)
  
      def _og_search_url(self, html, **kargs):
@@ -589,6 +592,24 @@ class InfoExtractor(object):
          self.to_screen(msg)
          time.sleep(timeout)
  
+    def _extract_f4m_formats(self, manifest_url, video_id):
+        manifest = self._download_xml(
+            manifest_url, video_id, 'Downloading f4m manifest',
+            'Unable to download f4m manifest')
+
+        formats = []
+        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+            formats.append({
+                'url': manifest_url,
+                'ext': 'flv',
+                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'width': int_or_none(media_el.attrib.get('width')),
+                'height': int_or_none(media_el.attrib.get('height')),
+            })
+        self._sort_formats(formats)
+
+        return formats
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py

index cb8e0682240bfed9a56a58490f18989f33fef71d..8049779b0a31049f704bae256a3752a9a22ad789 100644 (file)
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
              video_id)
          video_info = player_info.find('video')
  
-        f4m_info = self._download_xml(video_info.find('url').text, video_id)
+        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
          token_el = f4m_info.find('token')
          manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
  
diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py

new file mode 100644 (file)

index 0000000..6b65177
--- /dev/null
+++ b/youtube_dl/extractor/dump.py
@@ -0,0 +1,39 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DumpIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
+
+    _TEST = {
+        'url': 'http://www.dump.com/oneus/',
+        'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
+        'info_dict': {
+            'id': 'oneus',
+            'ext': 'flv',
+            'title': "He's one of us.",
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._search_regex(
+            r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
+
+        thumb = self._og_search_thumbnail(webpage)
+        title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumb,
+        }
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py

new file mode 100644 (file)

index 0000000..3e79236
--- /dev/null
+++ b/youtube_dl/extractor/ellentv.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class EllenTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
+    _TEST = {
+        'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
+        'md5': 'e4af06f3bf0d5f471921a18db5764642',
+        'info_dict': {
+            'id': '0-7jqrsr18',
+            'ext': 'mp4',
+            'title': 'What\'s Wrong with These Photos? A Whole Lot',
+            'timestamp': 1406876400,
+            'upload_date': '20140801',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        timestamp = parse_iso8601(self._search_regex(
+            r'<span class="publish-date"><time datetime="([^"]+)">',
+            webpage, 'timestamp'))
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'url': self._html_search_meta('VideoURL', webpage, 'url'),
+            'timestamp': timestamp,
+        }
+
+
+class EllenTVClipsIE(InfoExtractor):
+    IE_NAME = 'EllenTV:clips'
+    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
+    _TEST = {
+        'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
+        'info_dict': {
+            'id': 'meryl-streep-vanessa-hudgens',
+            'title': 'Meryl Streep, Vanessa Hudgens',
+        },
+        'playlist_mincount': 9,
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        playlist = self._extract_playlist(webpage)
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': self._og_search_title(webpage),
+            'entries': self._extract_entries(playlist)
+        }
+
+    def _extract_playlist(self, webpage):
+        json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
+        try:
+            return json.loads("[{" + json_string + "}]")
+        except ValueError as ve:
+            raise ExtractorError('Failed to download JSON', cause=ve)
+
+    def _extract_entries(self, playlist):
+        return [self.url_result(item['url'], 'EllenTV') for item in playlist]
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py

index 272dfe1f643208a31635dade0e561c8eb009aab7..476fc22b93424b13255d5eec3578eb985dbfbdfd 100644 (file)
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor):
              r'<meta name="description" content="([^"]*)"',
              webpage, 'description', fatal=False)
  
-        playerUrl = self._og_search_video_url(webpage, name=u'player URL')
+        playerUrl = self._og_search_video_url(webpage, name='player URL')
  
          title = self._html_search_regex(
              r'<meta name="title" content="([^"]*)"',
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index f0cd8f1565b7e7b1b5220ba0f68d0b9225d953e6..f7cf700b5df8ecbcd714fd23db3dc7ff477adb73 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -20,7 +20,7 @@ from ..utils import (
  class FacebookIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          https?://(?:\w+\.)?facebook\.com/
-        (?:[^#?]*\#!/)?
+        (?:[^#]*?\#!/)?
          (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
          (?:v|video_id)=(?P<id>[0-9]+)
          (?:.*)'''
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py

index 6d73c8a4a32f83975025a0b1ed932fc291176f8a..af439ccfeefeade46f75b693627b09ba6ed830d6 100644 (file)
--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
          fields = dict(re.findall(r'''(?x)<input\s+
              type="hidden"\s+
              name="([^"]+)"\s+
-            (?:id="[^"]+"\s+)?
              value="([^"]*)"
              ''', webpage))
  
@@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
          ext = self._search_regex(r'type:\s?\'([^\']+)\',',
                                   webpage, 'extension', fatal=False)
          video_url = self._search_regex(
-            r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
  
          formats = [{
              'format_id': 'sd',
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index 1fbe6d1759b8900160b7bc94b0a2396406acc016..1b0e8e5d59dc23d52d7fb15d7e46e0b1383a7435 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
              + video_id, video_id, 'Downloading XML config')
  
          manifest_url = info.find('videos/video/url').text
-        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
-        video_url = video_url.replace('/z/', '/i/')
+        manifest_url = manifest_url.replace('/z/', '/i/')
+        
+        if manifest_url.startswith('rtmp'):
+            formats = [{'url': manifest_url, 'ext': 'flv'}]
+        else:
+            formats = []
+            available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
+            for index, format_descr in enumerate(available_formats.split(',')):
+                format_info = {
+                    'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
+                    'ext': 'mp4',
+                }
+                m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
+                if m_resolution is not None:
+                    format_info.update({
+                        'width': int(m_resolution.group('width')),
+                        'height': int(m_resolution.group('height')),
+                    })
+                formats.append(format_info)
+
          thumbnail_path = info.find('image').text
  
-        return {'id': video_id,
-                'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
-                'url': video_url,
-                'title': info.find('titre').text,
-                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
-                'description': info.find('synopsis').text,
-                }
+        return {
+            'id': video_id,
+            'title': info.find('titre').text,
+            'formats': formats,
+            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
+            'description': info.find('synopsis').text,
+        }
  
  
  class PluzzIE(FranceTVBaseInfoExtractor):
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py

new file mode 100644 (file)

index 0000000..50f8fc7
--- /dev/null
+++ b/youtube_dl/extractor/gamestar.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    str_to_int,
+    unified_strdate,
+)
+
+
+class GameStarIE(InfoExtractor):
+    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+        'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
+        'info_dict': {
+            'id': '76110',
+            'ext': 'mp4',
+            'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+            'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
+            'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
+            'upload_date': '20140728',
+            'duration': 17
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        og_title = self._og_search_title(webpage)
+        title = og_title.replace(' - Video bei GameStar.de', '').strip()
+
+        url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
+
+        description = self._og_search_description(webpage).strip()
+
+        thumbnail = self._proto_relative_url(
+            self._og_search_thumbnail(webpage), scheme='http:')
+
+        upload_date = unified_strdate(self._html_search_regex(
+            r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)&nbsp;&nbsp;',
+            webpage, 'upload_date', fatal=False))
+
+        duration = parse_duration(self._html_search_regex(
+            r'&nbsp;&nbsp;Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
+            fatal=False))
+
+        view_count = str_to_int(self._html_search_regex(
+            r'&nbsp;&nbsp;Zuschauer: ([0-9\.]+)&nbsp;&nbsp;', webpage,
+            'view_count', fatal=False))
+
+        comment_count = int_or_none(self._html_search_regex(
+            r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
+            fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': url,
+            'ext': 'mp4',
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': upload_date,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count
+        }
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py

index 89d5994eef02623222b73fa04e0287a264b01333..de14ae1fb1edd0600488b8f04c7b400bf310ef5a 100644 (file)
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -8,6 +8,7 @@ from ..utils import (
      compat_urllib_request,
  )
  
+
  class GDCVaultIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
      _TESTS = [
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
                  'skip_download': True,  # Requires rtmpdump
              }
          },
+        {
+            'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
+            'md5': 'a5eb77996ef82118afbbe8e48731b98e',
+            'info_dict': {
+                'id': '1015301',
+                'ext': 'flv',
+                'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
+            }
+        }
      ]
  
      def _parse_mp4(self, xml_description):
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
          webpage_url = 'http://www.gdcvault.com/play/' + video_id
          start_page = self._download_webpage(webpage_url, video_id)
  
-        xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
+        direct_url = self._search_regex(
+            r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+            start_page, 'url', default=None)
+        if direct_url:
+            video_url = 'http://www.gdcvault.com/' + direct_url
+            title = self._html_search_regex(
+                r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
+                start_page, 'title')
+
+            return {
+                'id': video_id,
+                'url': video_url,
+                'ext': 'flv',
+                'title': title,
+            }
  
+        xml_root = self._html_search_regex(
+            r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
+            start_page, 'xml root', default=None)
          if xml_root is None:
              # Probably need to authenticate
-            start_page = self._login(webpage_url, video_id)
-            if start_page is None:
+            login_res = self._login(webpage_url, video_id)
+            if login_res is None:
                  self.report_warning('Could not login.')
              else:
+                start_page = login_res
                  # Grab the url from the authenticated page
-                xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
+                xml_root = self._html_search_regex(
+                    r'<iframe src="(.*?)player.html.*?".*?</iframe>',
+                    start_page, 'xml root')
  
-        xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
+        xml_name = self._html_search_regex(
+            r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
+            start_page, 'xml filename', default=None)
          if xml_name is None:
              # Fallback to the older format
              xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 9db27f9aa32730460af728f690ec131014667185..8e915735eaedcacaff85b4408fb93deb79bac16b 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
          if not parsed_url.scheme:
              default_search = self._downloader.params.get('default_search')
              if default_search is None:
-                default_search = 'error'
+                default_search = 'fixup_error'
  
-            if default_search in ('auto', 'auto_warning'):
+            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                  if '/' in url:
                      self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                      return self.url_result('http://' + url)
-                else:
+                elif default_search != 'fixup_error':
                      if default_search == 'auto_warning':
                          if re.match(r'^(?:url|URL)$', url):
                              raise ExtractorError(
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
                              self._downloader.report_warning(
                                  'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                      return self.url_result('ytsearch:' + url)
-            elif default_search == 'error':
+
+            if default_search in ('error', 'fixup_error'):
                  raise ExtractorError(
                      ('%r is not a valid URL. '
                       'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
@@ -705,6 +706,13 @@ class GenericIE(InfoExtractor):
              url = unescapeHTML(mobj.group('url'))
              return self.url_result(url, ie='MTVServicesEmbedded')
  
+        # Look for embedded yahoo player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Yahoo')
+
          # Start with something easy: JW Player in SWFObject
          found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if not found:
diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py

new file mode 100644 (file)

index 0000000..73bd6d8
--- /dev/null
+++ b/youtube_dl/extractor/godtube.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+    _TESTS = [
+        {
+            'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+            'md5': '77108c1e4ab58f48031101a1a2119789',
+            'info_dict': {
+                'id': '0C0CNNNU',
+                'ext': 'mp4',
+                'title': 'Woman at the well.',
+                'duration': 159,
+                'timestamp': 1205712000,
+                'uploader': 'beverlybmusic',
+                'upload_date': '20080317',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        config = self._download_xml(
+            'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+            video_id, 'Downloading player config XML')
+
+        video_url = config.find('.//file').text
+        uploader = config.find('.//author').text
+        timestamp = parse_iso8601(config.find('.//date').text)
+        duration = parse_duration(config.find('.//duration').text)
+        thumbnail = config.find('.//image').text
+
+        media = self._download_xml(
+            'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+        title = media.find('.//title').text
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py

new file mode 100644 (file)

index 0000000..68684b9
--- /dev/null
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -0,0 +1,134 @@
+from __future__ import unicode_literals
+
+import re
+import json
+import random
+import string
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr
+
+
+class HowStuffWorksIE(InfoExtractor):
+    _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
+    _TESTS = [
+        {
+            'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
+            'info_dict': {
+                'id': '450221',
+                'display_id': 'cool-jobs-iditarod-musher',
+                'ext': 'flv',
+                'title': 'Cool Jobs - Iditarod Musher',
+                'description': 'md5:82bb58438a88027b8186a1fccb365f90',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                # md5 is not consistent
+                'skip_download': True
+            }
+        },
+        {
+            'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
+            'info_dict': {
+                'id': '553470',
+                'display_id': 'deadliest-catch-jakes-farewell-pots',
+                'ext': 'mp4',
+                'title': 'Deadliest Catch: Jake\'s Farewell Pots',
+                'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                # md5 is not consistent
+                'skip_download': True
+            }
+        },
+        {
+            'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
+            'info_dict': {
+                'id': '440011',
+                'display_id': 'sword-swallowing-1-by-dan-meyer',
+                'ext': 'flv',
+                'title': 'Sword Swallowing #1 by Dan Meyer',
+                'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                # md5 is not consistent
+                'skip_download': True
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+        webpage = self._download_webpage(url, display_id)
+
+        content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
+
+        mp4 = self._search_regex(
+            r'''(?xs)var\s+clip\s*=\s*{\s*
+                .+?\s*
+                content_id\s*:\s*%s\s*,\s*
+                .+?\s*
+                mp4\s*:\s*\[(.*?),?\]\s*
+                };\s*
+                videoData\.push\(clip\);''' % content_id,
+            webpage, 'mp4', fatal=False, default=None)
+
+        smil = self._download_xml(
+            'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
+            content_id, 'Downloading video SMIL')
+
+        http_base = find_xpath_attr(
+            smil,
+            './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
+            'name',
+            'httpBase').get('content')
+
+        def random_string(str_len=0):
+            return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
+
+        URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
+
+        formats = []
+
+        if mp4:
+            for video in json.loads('[%s]' % mp4):
+                bitrate = video['bitrate']
+                fmt = {
+                    'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
+                    'format_id': bitrate,
+                }
+                m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
+                if m:
+                    fmt['vbr'] = int(m.group('vbr'))
+                formats.append(fmt)
+        else:
+            for video in smil.findall(
+                    './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
+                vbr = int(video.attrib['system-bitrate']) / 1000
+                formats.append({
+                    'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
+                    'format_id': '%dk' % vbr,
+                    'vbr': vbr,
+                })
+
+        self._sort_formats(formats)
+
+        title = self._og_search_title(webpage)
+        TITLE_SUFFIX = ' : HowStuffWorks'
+        if title.endswith(TITLE_SUFFIX):
+            title = title[:-len(TITLE_SUFFIX)]
+
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': content_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py

new file mode 100644 (file)

index 0000000..79e8430
--- /dev/null
+++ b/youtube_dl/extractor/izlesene.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_id,
+    parse_iso8601,
+    determine_ext,
+    int_or_none,
+    str_to_int,
+)
+
+
+class IzleseneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
+    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
+    _TEST = {
+        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+        'md5': '4384f9f0ea65086734b881085ee05ac2',
+        'info_dict': {
+            'id': '7599694',
+            'ext': 'mp4',
+            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
+            'thumbnail': 're:^http://.*\.jpg',
+            'uploader_id': 'pelikzzle',
+            'timestamp': 1404298698,
+            'upload_date': '20140702',
+            'duration': 95.395,
+            'age_limit': 0,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        url = 'http://www.izlesene.com/video/%s' % video_id
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        uploader = self._html_search_regex(
+            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date', fatal=False))
+
+        duration = int_or_none(self._html_search_regex(
+            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
+        if duration:
+            duration /= 1000.0
+
+        view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
+        comment_count = self._html_search_regex(
+            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
+
+        family_friendly = self._html_search_meta(
+            'isFamilyFriendly', webpage, 'age limit', fatal=False)
+
+        content_url = self._html_search_meta(
+            'contentURL', webpage, 'content URL', fatal=False)
+        ext = determine_ext(content_url, 'mp4')
+
+        # Might be empty for some videos.
+        qualities = self._html_search_regex(
+            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
+
+        formats = []
+        for quality in qualities.split('|'):
+            json = self._download_json(
+                self._STREAM_URL.format(id=video_id, format=quality), video_id,
+                note='Getting video URL for "%s" quality' % quality,
+                errnote='Failed to get video URL for "%s" quality' % quality
+            )
+            formats.append({
+                'url': json.get('streamurl'),
+                'ext': ext,
+                'format_id': '%sp' % quality if quality else 'sd',
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader_id': uploader,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
+            'age_limit': 18 if family_friendly == 'False' else 0,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py

new file mode 100644 (file)

index 0000000..cf73cd7
--- /dev/null
+++ b/youtube_dl/extractor/jove.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate
+)
+
+
+class JoveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+    _TESTS = [
+        {
+            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+            'info_dict': {
+                'id': '2744',
+                'ext': 'mp4',
+                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20110523',
+            }
+        },
+        {
+            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+            'md5': '914aeb356f416811d911996434811beb',
+            'info_dict': {
+                'id': '51796',
+                'ext': 'mp4',
+                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+                'description': 'md5:35ff029261900583970c4023b70f1dc9',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20140802',
+            }
+        },
+
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        chapters_id = self._html_search_regex(
+            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
+        chapters_xml = self._download_xml(
+            self._CHAPTERS_URL.format(video_id=chapters_id),
+            video_id, note='Downloading chapters XML',
+            errnote='Failed to download chapters XML')
+
+        video_url = chapters_xml.attrib.get('video')
+        if not video_url:
+            raise ExtractorError('Failed to get the video URL')
+
+        title = self._html_search_meta('citation_title', webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_regex(
+            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+            webpage, 'description', fatal=False)
+        publish_date = unified_strdate(self._html_search_meta(
+            'citation_publication_date', webpage, 'publish date', fatal=False))
+        comment_count = self._html_search_regex(
+            r'<meta name="num_comments" content="(\d+) Comments?"',
+            webpage, 'comment count', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': publish_date,
+            'comment_count': comment_count,
+        }
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py

index 7083db12ea012720f5dfda7039fdad9e21c12cc9..27017e89f632880c21643c0b58f04d23e963fd39 100644 (file)
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,5 +1,6 @@
  from __future__ import unicode_literals
  
+import itertools
  import json
  import os
  import re
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
      }
  
      # Return count of items, list of *valid* items
-    def _parse_page(self, url, video_id):
-        info_json = self._download_webpage(url, video_id,
-                                           'Downloading video info JSON',
-                                           'unable to download video info JSON')
+    def _parse_page(self, url, video_id, counter):
+        info_json = self._download_webpage(
+            url, video_id,
+            'Downloading video info JSON on page %d' % counter,
+            'Unable to download video info JSON %d' % counter)
  
          response = json.loads(info_json)
          if type(response) != list:
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
          entries = []
          offset = 0
          limit = self._JUSTIN_PAGE_LIMIT
-        while True:
-            if paged:
-                self.report_download_page(video_id, offset)
+        for counter in itertools.count(1):
              page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
-            page_count, page_info = self._parse_page(page_url, video_id)
+            page_count, page_info = self._parse_page(
+                page_url, video_id, counter)
              entries.extend(page_info)
              if not paged or page_count != limit:
                  break
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py

new file mode 100644 (file)

index 0000000..6f3d234
--- /dev/null
+++ b/youtube_dl/extractor/krasview.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class KrasViewIE(InfoExtractor):
+    IE_DESC = 'Красвью'
+    _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://krasview.ru/video/512228',
+        'md5': '3b91003cf85fc5db277870c8ebd98eae',
+        'info_dict': {
+            'id': '512228',
+            'ext': 'mp4',
+            'title': 'Снег, лёд, заносы',
+            'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
+            'duration': 27,
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        flashvars = json.loads(self._search_regex(
+            r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+
+        video_url = flashvars['url']
+        title = unescapeHTML(flashvars['title'])
+        description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
+        thumbnail = flashvars['image']
+        duration = int(flashvars['duration'])
+        filesize = int(flashvars['size'])
+        width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
+        height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'filesize': filesize,
+            'width': width,
+            'height': height,
+        }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index 1ea1bbab4dc31123d8c45669315226bfe2fdda68..281a0ce4052eb986d7d4df6d10b8c29b36cab6d5 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -5,11 +5,14 @@ import json
  
  from .common import InfoExtractor
  from ..utils import (
+    compat_str,
      compat_urllib_parse_urlparse,
      compat_urlparse,
-    xpath_with_ns,
-    compat_str,
+    ExtractorError,
+    find_xpath_attr,
+    int_or_none,
      orderedSet,
+    xpath_with_ns,
  )
  
  
@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Live from Webster Hall NYC',
              'upload_date': '20121012',
+            'like_count': int,
+            'view_count': int,
+            'thumbnail': 're:^http://.*\.jpg$'
          }
      }
  
+    def _parse_smil(self, video_id, smil_url):
+        formats = []
+        _SWITCH_XPATH = (
+            './/{http://www.w3.org/2001/SMIL20/Language}body/'
+            '{http://www.w3.org/2001/SMIL20/Language}switch')
+        smil_doc = self._download_xml(
+            smil_url, video_id,
+            note='Downloading SMIL information',
+            errnote='Unable to download SMIL information',
+            fatal=False)
+        if smil_doc is False:  # Download failed
+            return formats
+        title_node = find_xpath_attr(
+            smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
+            'name', 'title')
+        if title_node is None:
+            self.report_warning('Cannot find SMIL id')
+            switch_node = smil_doc.find(_SWITCH_XPATH)
+        else:
+            title_id = title_node.attrib['content']
+            switch_node = find_xpath_attr(
+                smil_doc, _SWITCH_XPATH, 'id', title_id)
+        if switch_node is None:
+            raise ExtractorError('Cannot find switch node')
+        video_nodes = switch_node.findall(
+            '{http://www.w3.org/2001/SMIL20/Language}video')
+
+        for vn in video_nodes:
+            tbr = int_or_none(vn.attrib.get('system-bitrate'))
+            furl = (
+                'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
+                (vn.attrib['src']))
+            if 'clipBegin' in vn.attrib:
+                furl += '&ssek=' + vn.attrib['clipBegin']
+            formats.append({
+                'url': furl,
+                'format_id': 'smil_%d' % tbr,
+                'ext': 'flv',
+                'tbr': tbr,
+                'preference': -1000,
+            })
+        return formats
+
      def _extract_video_info(self, video_data):
-        video_url = (
-            video_data.get('progressive_url_hd') or
-            video_data.get('progressive_url')
+        video_id = compat_str(video_data['id'])
+
+        FORMAT_KEYS = (
+            ('sd', 'progressive_url'),
+            ('hd', 'progressive_url_hd'),
          )
+        formats = [{
+            'format_id': format_id,
+            'url': video_data[key],
+            'quality': i + 1,
+        } for i, (format_id, key) in enumerate(FORMAT_KEYS)
+            if video_data.get(key)]
+
+        smil_url = video_data.get('smil_url')
+        if smil_url:
+            formats.extend(self._parse_smil(video_id, smil_url))
+        self._sort_formats(formats)
+
          return {
-            'id': compat_str(video_data['id']),
-            'url': video_url,
+            'id': video_id,
+            'formats': formats,
              'title': video_data['caption'],
-            'thumbnail': video_data['thumbnail_url'],
+            'thumbnail': video_data.get('thumbnail_url'),
              'upload_date': video_data['updated_at'].replace('-', '')[:8],
+            'like_count': video_data.get('likes', {}).get('total'),
+            'view_count': video_data.get('views'),
          }
  
      def _real_extract(self, url):
@@ -46,17 +111,28 @@ class LivestreamIE(InfoExtractor):
          event_name = mobj.group('event_name')
          webpage = self._download_webpage(url, video_id or event_name)
  
-        if video_id is None:
-            # This is an event page:
+        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
+        if og_video is None:
              config_json = self._search_regex(
                  r'window.config = ({.*?});', webpage, 'window config')
              info = json.loads(config_json)['event']
+
+            def is_relevant(vdata, vid):
+                result = vdata['type'] == 'video'
+                if video_id is not None:
+                    result = result and compat_str(vdata['data']['id']) == vid
+                return result
+
              videos = [self._extract_video_info(video_data['data'])
-                for video_data in info['feed']['data']
-                if video_data['type'] == 'video']
-            return self.playlist_result(videos, info['id'], info['full_name'])
+                      for video_data in info['feed']['data']
+                      if is_relevant(video_data, video_id)]
+            if video_id is None:
+                # This is an event page:
+                return self.playlist_result(videos, info['id'], info['full_name'])
+            else:
+                if videos:
+                    return videos[0]
          else:
-            og_video = self._og_search_video_url(webpage, 'player url')
              query_str = compat_urllib_parse_urlparse(og_video).query
              query = compat_urlparse.parse_qs(query_str)
              api_url = query['play_url'][0].replace('.smil', '')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py

index 6436c05a3cd8e3f25499b9ff911de837a6c98207..1a896b536dd813a561cea8f870258bf73519e00b 100644 (file)
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -9,6 +9,7 @@ from ..utils import (
      compat_urllib_request,
      determine_ext,
      ExtractorError,
+    int_or_none,
  )
  
  
@@ -83,6 +84,21 @@ class MetacafeIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        # Movieclips.com video
+        {
+            'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
+            'info_dict': {
+                'id': 'mv-Wy7ZU',
+                'ext': 'mp4',
+                'title': 'My Week with Marilyn - Do You Love Me?',
+                'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
+                'uploader': 'movie_trailers',
+                'duration': 176,
+            },
+            'params': {
+                'skip_download': 'requires rtmpdump',
+            }
+        }
      ]
  
      def report_disclaimer(self):
@@ -134,6 +150,7 @@ class MetacafeIE(InfoExtractor):
  
          # Extract URL, uploader and title from webpage
          self.report_extraction(video_id)
+        video_url = None
          mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
          if mobj is not None:
              mediaURL = compat_urllib_parse.unquote(mobj.group(1))
@@ -146,16 +163,17 @@ class MetacafeIE(InfoExtractor):
              else:
                  gdaKey = mobj.group(1)
                  video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
-        else:
+        if video_url is None:
              mobj = re.search(r'<video src="([^"]+)"', webpage)
              if mobj:
                  video_url = mobj.group(1)
                  video_ext = 'mp4'
-            else:
-                mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
-                if mobj is None:
-                    raise ExtractorError('Unable to extract media URL')
-                vardict = compat_parse_qs(mobj.group(1))
+        if video_url is None:
+            flashvars = self._search_regex(
+                r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
+                default=None)
+            if flashvars:
+                vardict = compat_parse_qs(flashvars)
                  if 'mediaData' not in vardict:
                      raise ExtractorError('Unable to extract media URL')
                  mobj = re.search(
@@ -165,26 +183,68 @@ class MetacafeIE(InfoExtractor):
                  mediaURL = mobj.group('mediaURL').replace('\\/', '/')
                  video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
                  video_ext = determine_ext(video_url)
-
-        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
+        if video_url is None:
+            player_url = self._search_regex(
+                r"swfobject\.embedSWF\('([^']+)'",
+                webpage, 'config URL', default=None)
+            if player_url:
+                config_url = self._search_regex(
+                    r'config=(.+)$', player_url, 'config URL')
+                config_doc = self._download_xml(
+                    config_url, video_id,
+                    note='Downloading video config')
+                smil_url = config_doc.find('.//properties').attrib['smil_file']
+                smil_doc = self._download_xml(
+                    smil_url, video_id,
+                    note='Downloading SMIL document')
+                base_url = smil_doc.find('./head/meta').attrib['base']
+                video_url = []
+                for vn in smil_doc.findall('.//video'):
+                    br = int(vn.attrib['system-bitrate'])
+                    play_path = vn.attrib['src']
+                    video_url.append({
+                        'format_id': 'smil-%d' % br,
+                        'url': base_url,
+                        'play_path': play_path,
+                        'page_url': url,
+                        'player_url': player_url,
+                        'ext': play_path.partition(':')[0],
+                    })
+
+        if video_url is None:
+            raise ExtractorError('Unsupported video type')
+
+        video_title = self._html_search_regex(
+            r'(?im)<title>(.*) - Video</title>', webpage, 'title')
          description = self._og_search_description(webpage)
          thumbnail = self._og_search_thumbnail(webpage)
          video_uploader = self._html_search_regex(
                  r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                  webpage, 'uploader nickname', fatal=False)
+        duration = int_or_none(
+            self._html_search_meta('video:duration', webpage))
+
+        age_limit = (
+            18
+            if re.search(r'"contentRating":"restricted"', webpage)
+            else 0)
  
-        if re.search(r'"contentRating":"restricted"', webpage) is not None:
-            age_limit = 18
+        if isinstance(video_url, list):
+            formats = video_url
          else:
-            age_limit = 0
+            formats = [{
+                'url': video_url,
+                'ext': video_ext,
+            }]
  
+        self._sort_formats(formats)
          return {
              'id': video_id,
-            'url': video_url,
              'description': description,
              'uploader': video_uploader,
              'title': video_title,
-            'thumbnail':thumbnail,
-            'ext': video_ext,
+            'thumbnail': thumbnail,
              'age_limit': age_limit,
+            'formats': formats,
+            'duration': duration,
          }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

new file mode 100644 (file)

index 0000000..979f3d6
--- /dev/null
+++ b/youtube_dl/extractor/mitele.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    get_element_by_attribute,
+    parse_duration,
+    strip_jsonp,
+)
+
+
+class MiTeleIE(InfoExtractor):
+    IE_NAME = 'mitele.es'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+
+    _TEST = {
+        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
+        'info_dict': {
+            'id': '0fce117d',
+            'ext': 'mp4',
+            'title': 'Programa 144 - Tor, la web invisible',
+            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'display_id': 'programa-144',
+            'duration': 2913,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = mobj.group('episode')
+        webpage = self._download_webpage(url, episode)
+        embed_data_json = self._search_regex(
+            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
+            flags=re.DOTALL
+        ).replace('\'', '"')
+        embed_data = json.loads(embed_data_json)
+
+        info_url = embed_data['flashvars']['host']
+        info_el = self._download_xml(info_url, episode).find('./video/info')
+
+        video_link = info_el.find('videoUrl/link').text
+        token_query = compat_urllib_parse.urlencode({'id': video_link})
+        token_info = self._download_json(
+            'http://token.mitele.es/?' + token_query, episode,
+            transform_source=strip_jsonp
+        )
+
+        return {
+            'id': embed_data['videoId'],
+            'display_id': episode,
+            'title': info_el.find('title').text,
+            'url': token_info['tokenizedUrl'],
+            'description': get_element_by_attribute('class', 'text', webpage),
+            'thumbnail': info_el.find('thumb').text,
+            'duration': parse_duration(info_el.find('duration').text),
+        }
diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py

new file mode 100644 (file)

index 0000000..90b460d
--- /dev/null
+++ b/youtube_dl/extractor/mojvideo.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+)
+
+
+class MojvideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
+    _TEST = {
+        'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
+        'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
+        'info_dict': {
+            'id': '3d1ed4497707730b2906',
+            'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
+            'ext': 'mp4',
+            'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
+            'thumbnail': 're:^http://.*\.jpg$',
+            'duration': 242,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        # XML is malformed
+        playerapi = self._download_webpage(
+            'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
+
+        if '<error>true</error>' in playerapi:
+            error_desc = self._html_search_regex(
+                r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)</title>', playerapi, 'title')
+        video_url = self._html_search_regex(
+            r'<file>([^<]+)</file>', playerapi, 'video URL')
+        thumbnail = self._html_search_regex(
+            r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py

index 1c5e9401f36c72a73a701bdffc89529979a1eaaf..6b2f3f55a60d19ff3b4735027a399b6c38ad1310 100644 (file)
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
  from __future__ import unicode_literals
  
  import re
@@ -8,19 +9,34 @@ from ..utils import ExtractorError
  
  
  class NownessIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
-
-    _TEST = {
-        'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
-        'md5': '068bc0202558c2e391924cb8cc470676',
-        'info_dict': {
-            'id': '2520295746001',
-            'ext': 'mp4',
-            'description': 'Candor: The Art of Gesticulation',
-            'uploader': 'Nowness',
-            'title': 'Candor: The Art of Gesticulation',
-        }
-    }
+    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
+
+    _TESTS = [
+        {
+            'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
+            'md5': '068bc0202558c2e391924cb8cc470676',
+            'info_dict': {
+                'id': '2520295746001',
+                'ext': 'mp4',
+                'title': 'Candor: The Art of Gesticulation',
+                'description': 'Candor: The Art of Gesticulation',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Nowness',
+            }
+        },
+        {
+            'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
+            'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+            'info_dict': {
+                'id': '3716354522001',
+                'ext': 'mp4',
+                'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+                'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Nowness',
+            }
+        },
+    ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/oe1.py b/youtube_dl/extractor/oe1.py

deleted file mode 100644 (file)

index 38971ab..0000000
--- a/youtube_dl/extractor/oe1.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import calendar
-import datetime
-import re
-
-from .common import InfoExtractor
-
-# audios on oe1.orf.at are only available for 7 days, so we can't
-# add tests.
-
-
-class OE1IE(InfoExtractor):
-    IE_DESC = 'oe1.orf.at'
-    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
-
-        data = self._download_json(
-            'http://oe1.orf.at/programm/%s/konsole' % show_id,
-            show_id
-        )
-
-        timestamp = datetime.datetime.strptime('%s %s' % (
-            data['item']['day_label'],
-            data['item']['time']
-        ), '%d.%m.%Y %H:%M')
-        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
-
-        return {
-            'id': show_id,
-            'title': data['item']['title'],
-            'url': data['item']['url_stream'],
-            'ext': 'mp3',
-            'description': data['item'].get('info'),
-            'timestamp': unix_timestamp
-        }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py

index 13f12824c99aa71c357047ff62a866365bbc49fb..2044e107eba9808bbde802e8468bf6b009841fb8 100644 (file)
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -3,23 +3,38 @@ import re
  import json
  
  from .common import InfoExtractor
-from ..utils import unescapeHTML
+from ..utils import (
+    unescapeHTML,
+    ExtractorError,
+)
  
  
  class OoyalaIE(InfoExtractor):
      _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
  
-    _TEST = {
-        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
-        'info_dict': {
-            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-            'ext': 'mp4',
-            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
-            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+    _TESTS = [
+        {
+            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+            'info_dict': {
+                'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+                'ext': 'mp4',
+                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+            },
+        }, {
+            # Only available for ipad
+            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+            'md5': '4b9754921fddb68106e48c142e2a01e6',
+            'info_dict': {
+                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+                'ext': 'mp4',
+                'title': 'Simulation Overview - Levels of Simulation',
+                'description': '',
+            },
          },
-    }
+    ]
  
      @staticmethod
      def _url_for_embed_code(embed_code):
@@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
          player = self._download_webpage(player_url, embedCode)
          mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                          player, 'mobile player url')
-        mobile_player = self._download_webpage(mobile_url, embedCode)
-        videos_info = self._search_regex(
-            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
-            mobile_player, 'info').replace('\\"','"')
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
+        # Looks like some videos are only available for particular devices
+        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
+        # is only available for ipad)
+        # Working around with fetching URLs for all the devices found starting with 'unknown'
+        # until we succeed or eventually fail for each device.
+        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
+        devices.remove('unknown')
+        devices.insert(0, 'unknown')
+        for device in devices:
+            mobile_player = self._download_webpage(
+                '%s&device=%s' % (mobile_url, device), embedCode,
+                'Downloading mobile player JS for %s device' % device)
+            videos_info = self._search_regex(
+                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+                mobile_player, 'info', fatal=False, default=None)
+            if videos_info:
+                break
+        if not videos_info:
+            raise ExtractorError('Unable to extract info')
+        videos_info = videos_info.replace('\\"', '"')
+        videos_more_info = self._search_regex(
+            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
          videos_info = json.loads(videos_info)
-        videos_more_info =json.loads(videos_more_info)
+        videos_more_info = json.loads(videos_more_info)
  
          if videos_more_info.get('lineup'):
              videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py

index 03421d1d5c78f2acd712e560ae17fb96d4a323be..011e6be13e63562dad8def87ea264a7e1b6783af 100644 (file)
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
  
  import json
  import re
+import calendar
+import datetime
  
  from .common import InfoExtractor
  from ..utils import (
@@ -12,7 +14,9 @@ from ..utils import (
  )
  
  
-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
+    IE_NAME = 'orf:tvthek'
+    IE_DESC = 'ORF TVthek'
      _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
  
      _TEST = {
@@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
              'entries': entries,
              'id': playlist_id,
          }
+
+
+# Audios on ORF radio are only available for 7 days, so we can't add tests.
+
+
+class ORFOE1IE(InfoExtractor):
+    IE_NAME = 'orf:oe1'
+    IE_DESC = 'Radio Österreich 1'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
+
+
+class ORFFM4IE(InfoExtractor):
+    IE_DESC = 'orf:fm4'
+    IE_DESC = 'radio FM4'
+    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_date = mobj.group('date')
+        show_id = mobj.group('show')
+
+        data = self._download_json(
+            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+            show_id
+        )
+
+        def extract_entry_dict(info, title, subtitle):
+            return {
+                'id': info['loopStreamId'].replace('.mp3', ''),
+                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+                'title': title,
+                'description': subtitle,
+                'duration': (info['end'] - info['start']) / 1000,
+                'timestamp': info['start'] / 1000,
+                'ext': 'mp3'
+            }
+
+        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': data['title'],
+            'description': data['subtitle'],
+            'entries': entries
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py

new file mode 100644 (file)

index 0000000..707a54e
--- /dev/null
+++ b/youtube_dl/extractor/patreon.py
@@ -0,0 +1,101 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    js_to_json,
+)
+
+
+class PatreonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
+    _TESTS = [
+        {
+            'url': 'http://www.patreon.com/creation?hid=743933',
+            'md5': 'e25505eec1053a6e6813b8ed369875cc',
+            'info_dict': {
+                'id': '743933',
+                'ext': 'mp3',
+                'title': 'Episode 166: David Smalley of Dogma Debate',
+                'uploader': 'Cognitive Dissonance Podcast',
+                'thumbnail': 're:^https?://.*$',
+            },
+        },
+        {
+            'url': 'http://www.patreon.com/creation?hid=754133',
+            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+            'info_dict': {
+                'id': '754133',
+                'ext': 'mp3',
+                'title': 'CD 167 Extra',
+                'uploader': 'Cognitive Dissonance Podcast',
+                'thumbnail': 're:^https?://.*$',
+            },
+        },
+    ]
+
+    # Currently Patreon exposes download URL via hidden CSS, so login is not
+    # needed. Keeping this commented for when this inevitably changes.
+    '''
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'redirectUrl': 'http://www.patreon.com/',
+            'email': username,
+            'password': password,
+        }
+
+        request = compat_urllib_request.Request(
+            'https://www.patreon.com/processLogin',
+            compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        )
+        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+
+        if re.search(r'onLoginFailed', login_page):
+            raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+
+    def _real_initialize(self):
+        self._login()
+    '''
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage).strip()
+
+        attach_fn = self._html_search_regex(
+            r'<div class="attach"><a target="_blank" href="([^"]+)">',
+            webpage, 'attachment URL', default=None)
+        if attach_fn is not None:
+            video_url = 'http://www.patreon.com' + attach_fn
+            thumbnail = self._og_search_thumbnail(webpage)
+            uploader = self._html_search_regex(
+                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
+        else:
+            playlist_js = self._search_regex(
+                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
+                webpage, 'playlist JSON')
+            playlist_json = js_to_json(playlist_js)
+            playlist = json.loads(playlist_json)
+            data = playlist[0]
+            video_url = self._proto_relative_url(data['mp3'])
+            thumbnail = self._proto_relative_url(data.get('cover'))
+            uploader = data.get('artist')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp3',
+            'title': title,
+            'uploader': uploader,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

index 64cded70789249746a5e2b6604d86563a6ad499c..dee4af6f149eebfd949996199e69586233c67078 100644 (file)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -20,25 +20,60 @@ class PBSIE(InfoExtractor):
          )
      '''
  
-    _TEST = {
-        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-        'md5': 'ce1888486f0908d555a8093cac9a7362',
-        'info_dict': {
-            'id': '2365006249',
-            'ext': 'mp4',
-            'title': 'A More Perfect Union',
-            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
-            'duration': 3190,
+    _TESTS = [
+        {
+            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'info_dict': {
+                'id': '2365006249',
+                'ext': 'mp4',
+                'title': 'A More Perfect Union',
+                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+                'duration': 3190,
+            },
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+            'md5': '143c98aa54a346738a3d78f54c925321',
+            'info_dict': {
+                'id': '2365297690',
+                'ext': 'mp4',
+                'title': 'Losing Iraq',
+                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+                'duration': 5050,
+            },
+        },
+        {
+            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+            'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+            'info_dict': {
+                'id': '2201174722',
+                'ext': 'mp4',
+                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
+                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+                'duration': 801,
+            },
          },
-    }
+    ]
  
-    def _real_extract(self, url):
+    def _extract_ids(self, url):
          mobj = re.match(self._VALID_URL, url)
  
          presumptive_id = mobj.group('presumptive_id')
          display_id = presumptive_id
          if presumptive_id:
              webpage = self._download_webpage(url, display_id)
+
+            MEDIA_ID_REGEXES = [
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
+                r'class="coveplayerid">([^<]+)<',                       # coveplayer
+            ]
+
+            media_id = self._search_regex(
+                MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
+            if media_id:
+                return media_id, presumptive_id
+
              url = self._search_regex(
                  r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                  webpage, 'player URL')
@@ -57,6 +92,11 @@ class PBSIE(InfoExtractor):
              video_id = mobj.group('id')
              display_id = video_id
  
+        return video_id, display_id
+
+    def _real_extract(self, url):
+        video_id, display_id = self._extract_ids(url)
+
          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
          info = self._download_json(info_url, display_id)
  
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py

index 49cf427a1141ee6f8fd14aca490e088dedbe3d73..ec7e7df7bc1f7a6b8ffdb4fc46b24a9bf8cb5148 100644 (file)
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@@ -1,23 +1,23 @@
  from __future__ import unicode_literals
  
  import re
-import time
  
  from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none
  
  
  class ReverbNationIE(InfoExtractor):
      _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
      _TESTS = [{
          'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'file': '16965047.mp3',
          'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
          'info_dict': {
+            "id": "16965047",
+            "ext": "mp3",
              "title": "MONA LISA",
              "uploader": "ALKILADOS",
-            "uploader_id": 216429,
-            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+            "uploader_id": "216429",
+            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
          },
      }]
  
@@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
          song_id = mobj.group('id')
  
          api_res = self._download_json(
-            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
-                % (song_id, int(time.time() * 1000)),
+            'https://api.reverbnation.com/song/%s' % song_id,
              song_id,
-            transform_source=strip_jsonp,
              note='Downloading information of song %s' % song_id
          )
  
@@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
              'title': api_res.get('name'),
              'url': api_res.get('url'),
              'uploader': api_res.get('artist', {}).get('name'),
-            'uploader_id': api_res.get('artist', {}).get('id'),
-            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+            'thumbnail': self._proto_relative_url(
+                api_res.get('image', api_res.get('thumbnail'))),
              'ext': 'mp3',
              'vcodec': 'none',
          }
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py

new file mode 100644 (file)

index 0000000..14928cd
--- /dev/null
+++ b/youtube_dl/extractor/rtlnl.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RtlXlIE(InfoExtractor):
+    IE_NAME = 'rtlxl.nl'
+    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+
+    _TEST = {
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'info_dict': {
+            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'ext': 'flv',
+            'title': 'RTL Nieuws - Laat',
+            'description': 'Dagelijks het laatste nieuws uit binnen- en '
+                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
+                'onze mobiele apps.',
+            'timestamp': 1408051800,
+            'upload_date': '20140814',
+        },
+        'params': {
+            # We download the first bytes of the first fragment, it can't be
+            # processed by the f4m downloader beacuse it isn't complete
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uuid = mobj.group('uuid')
+
+        info = self._download_json(
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+            uuid)
+        meta = info['meta']
+        material = info['material'][0]
+        episode_info = info['episodes'][0]
+
+        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
+        progname = info['abstracts'][0]['name']
+        subtitle = material['title'] or info['episodes'][0]['name']
+
+        return {
+            'id': uuid,
+            'title': '%s - %s' % (progname, subtitle), 
+            'formats': self._extract_f4m_formats(f4m_url, uuid),
+            'timestamp': material['original_date'],
+            'description': episode_info['synopsis'],
+        }
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py

new file mode 100644 (file)

index 0000000..badba2a
--- /dev/null
+++ b/youtube_dl/extractor/shared.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_request,
+    compat_urllib_parse,
+    int_or_none,
+)
+
+
+class SharedIE(InfoExtractor):
+    _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
+
+    _TEST = {
+        'url': 'http://shared.sx/0060718775',
+        'md5': '106fefed92a8a2adb8c98e6a0652f49b',
+        'info_dict': {
+            'id': '0060718775',
+            'ext': 'mp4',
+            'title': 'Bmp4',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        if re.search(r'>File does not exist<', page) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
+
+        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+        video_page = self._download_webpage(request, video_id, 'Downloading video page')
+
+        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
+        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
+        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
+        thumbnail = self._html_search_regex(
+            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'filesize': filesize,
+            'title': title,
+            'thumbnail': thumbnail,
+        }
+\ No newline at end of file
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py

index 9faf3a5e3f677ae8b00454c492f6ef2bf129d329..172def221e1277298dc355a2cfdbea3ae4f9fdce 100644 (file)
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -1,4 +1,6 @@
  # coding: utf-8
+from __future__ import unicode_literals
+
  import re
  import time
  
@@ -10,18 +12,18 @@ from ..utils import (
  
  
  class StreamcloudIE(InfoExtractor):
-    IE_NAME = u'streamcloud.eu'
+    IE_NAME = 'streamcloud.eu'
      _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
  
      _TEST = {
-        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
-        u'file': u'skp9j99s4bpz.mp4',
-        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
-        u'info_dict': {
-            u'title': u'youtube-dl test video  \'/\\ ä ↭',
-            u'duration': 9,
+        'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
+        'md5': '6bea4c7fa5daaacc2a946b7146286686',
+        'info_dict': {
+            'id': 'skp9j99s4bpz',
+            'ext': 'mp4',
+            'title': 'youtube-dl test video  \'/\\ ä ↭',
          },
-        u'skip': u'Only available from the EU'
+        'skip': 'Only available from the EU'
      }
  
      def _real_extract(self, url):
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
          req = compat_urllib_request.Request(url, post, headers)
  
          webpage = self._download_webpage(
-            req, video_id, note=u'Downloading video page ...')
+            req, video_id, note='Downloading video page ...')
          title = self._html_search_regex(
-            r'<h1[^>]*>([^<]+)<', webpage, u'title')
+            r'<h1[^>]*>([^<]+)<', webpage, 'title')
          video_url = self._search_regex(
-            r'file:\s*"([^"]+)"', webpage, u'video URL')
-        duration_str = self._search_regex(
-            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
-        duration = None if duration_str is None else int(duration_str)
+            r'file:\s*"([^"]+)"', webpage, 'video URL')
          thumbnail = self._search_regex(
-            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
+            r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
  
          return {
              'id': video_id,
              'title': title,
              'url': video_url,
-            'duration': duration,
              'thumbnail': thumbnail,
          }
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py

index 6c688c5202804986b6a0d6d154cb986e18789073..5d9d703673265ca4a53a54f28e34494d570cb206 100644 (file)
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -8,7 +8,7 @@ from ..utils import parse_duration
  
  
  class SWRMediathekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
  
      _TESTS = [{
          'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
              'uploader': 'SWR 2',
              'uploader_id': '284670',
          }
+    }, {
+        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+        'md5': '881531487d0633080a8cc88d31ef896f',
+        'info_dict': {
+            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+            'ext': 'mp4',
+            'title': 'Familienspaß am Bodensee',
+            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
+            'thumbnail': 're:http://.*\.jpg',
+            'duration': 1784,
+            'upload_date': '20140727',
+            'uploader': 'SWR Fernsehen BW',
+            'uploader_id': '281130',
+        }
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index f8dd7e955ada5ce58fd04d668027587eda1b6c00..fa796ce72126610cda53db5378d926b44d72e526 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor):
          video_id = mobj.group("video_id")
          if not video_id:
              video_id = self._html_search_regex(
-                r'<article class="video" data-id="(\d+?)"',
+                r'data-node-id="(\d+?)"',
                  webpage, 'video id')
  
          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py

new file mode 100644 (file)

index 0000000..a56a7ab
--- /dev/null
+++ b/youtube_dl/extractor/tvplay.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+    qualities,
+)
+
+
+class TVPlayIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+            'info_dict': {
+                'id': '418113',
+                'ext': 'flv',
+                'title': 'Kādi ir īri? - Viņas melo labāk',
+                'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+                'duration': 25,
+                'timestamp': 1406097056,
+                'upload_date': '20140723',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        video = self._download_json(
+            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+        if video['is_geo_blocked']:
+            raise ExtractorError(
+                'This content is not available in your country due to copyright reasons', expected=True)
+
+        streams = self._download_json(
+            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
+
+        quality = qualities(['hls', 'medium', 'high'])
+        formats = []
+        for format_id, video_url in streams['streams'].items():
+            if not video_url:
+                continue
+            fmt = {
+                'format_id': format_id,
+                'preference': quality(format_id),
+            }
+            if video_url.startswith('rtmp'):
+                m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+                if not m:
+                    continue
+                fmt.update({
+                    'ext': 'flv',
+                    'url': m.group('url'),
+                    'app': m.group('app'),
+                    'play_path': m.group('playpath'),
+                })
+            else:
+                fmt.update({
+                    'url': video_url,
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video['title'],
+            'description': video['description'],
+            'duration': video['duration'],
+            'timestamp': parse_iso8601(video['created_at']),
+            'view_count': video['views']['total'],
+            'age_limit': video.get('age_limit', 0),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py

new file mode 100644 (file)

index 0000000..0182d67
--- /dev/null
+++ b/youtube_dl/extractor/ubu.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class UbuIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
+    _TEST = {
+        'url': 'http://ubu.com/film/her_noise.html',
+        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+        'info_dict': {
+            'id': 'her_noise',
+            'ext': 'mp4',
+            'title': 'Her Noise - The Making Of (2007)',
+            'duration': 3600,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
+
+        duration = int_or_none(self._html_search_regex(
+            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
+        if duration:
+            duration *= 60
+
+        formats = []
+
+        FORMAT_REGEXES = [
+            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
+            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+        ]
+
+        for format_id, format_regex in FORMAT_REGEXES:
+            m = re.search(format_regex, webpage)
+            if m:
+                formats.append({
+                    'url': m.group(1),
+                    'format_id': format_id,
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py

index eada13ce920b9f4e892f952242ef87bfac504600..d2ffd1b6ba893f2cb2cc50f00a3131a835dba97d 100644 (file)
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
              self._downloader.report_warning(
                  'Cannot download SMIL information, falling back to JSON ..')
  
+        self._sort_formats(formats)
          timestamp_ms = int(self._search_regex(
              r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
  
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py

new file mode 100644 (file)

index 0000000..5c89824
--- /dev/null
+++ b/youtube_dl/extractor/vidme.py
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    str_to_int,
+)
+
+
+class VidmeIE(InfoExtractor):
+    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
+    _TEST = {
+        'url': 'https://vid.me/QNB',
+        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+        'info_dict': {
+            'id': 'QNB',
+            'ext': 'mp4',
+            'title': 'Fishing for piranha - the easy way',
+            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
+            'duration': 119.92,
+            'timestamp': 1406313244,
+            'upload_date': '20140725',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default='')
+        thumbnail = self._og_search_thumbnail(webpage)
+        timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
+        width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
+        height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
+        view_count = str_to_int(self._html_search_regex(
+            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._html_search_regex(
+            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
+            webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
+            webpage, 'comment count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'width': width,
+            'height': height,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'comment_count': comment_count,
+        }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index a3c6e83b01194d37b683912d131c93664dbf0680..11c7d7e817f1f0839604311534dd918b5b5e4fee 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                  'videopassword': 'youtube-dl',
              },
          },
+        {
+            'url': 'http://vimeo.com/channels/keypeele/75629013',
+            'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+            'note': 'Video is freely available via original URL '
+                    'and protected with password when accessed via http://vimeo.com/75629013',
+            'info_dict': {
+                'id': '75629013',
+                'ext': 'mp4',
+                'title': 'Key & Peele: Terrorist Interrogation',
+                'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'uploader_id': 'atencio',
+                'uploader': 'Peter Atencio',
+                'duration': 187,
+            },
+        },
          {
              'url': 'http://vimeo.com/76979871',
              'md5': '3363dd6ffebe3784d56f4132317fd446',
@@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          video_id = mobj.group('id')
          if mobj.group('pro') or mobj.group('player'):
              url = 'http://player.vimeo.com/video/' + video_id
-        else:
-            url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
          request = compat_urllib_request.Request(url, None, headers)
@@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          if video_thumbnail is None:
              video_thumbs = config["video"].get("thumbs")
              if video_thumbs and isinstance(video_thumbs, dict):
-                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
+                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
  
          # Extract video description
          video_description = None
diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py

index f1b9e9a19d05d9026feb24b6f22d395cd3990e5f..2544c24bd16e4af7b3fba806dff1b87530759f82 100644 (file)
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@@ -1,10 +1,12 @@
  from __future__ import unicode_literals
  
-import json
  import re
  
  from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    compat_str,
+)
  
  
  class VubeIE(InfoExtractor):
@@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
                  'like_count': int,
                  'dislike_count': int,
                  'comment_count': int,
+                'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
              }
          },
          {
@@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
                  'like_count': int,
                  'dislike_count': int,
                  'comment_count': int,
+                'categories': ['seraina', 'jessica', 'krewella', 'alive'],
              }
          }, {
              'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
@@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Frozen - Let It Go Cover by Siren Gene',
                  'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
-                'uploader': 'Siren Gene',
-                'uploader_id': 'Siren',
                  'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
+                'uploader': 'Siren',
+                'timestamp': 1395448018,
+                'upload_date': '20140322',
                  'duration': 221.788,
                  'like_count': int,
                  'dislike_count': int,
                  'comment_count': int,
+                'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
              }
          }
      ]
@@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
  
-        webpage = self._download_webpage(url, video_id)
-        data_json = self._search_regex(
-            r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
-            webpage, 'video data'
-        )
-        data = json.loads(data_json)
-        video = (
-            data.get('video') or
-            data)
-        assert isinstance(video, dict)
+        video = self._download_json(
+            'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
  
          public_id = video['public_id']
  
-        formats = [
-            {
-                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
-                'height': int(fmt['height']),
-                'abr': int(fmt['audio_bitrate']),
-                'vbr': int(fmt['video_bitrate']),
-                'format_id': fmt['media_resolution_id']
-            } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
-        ]
+        formats = []
+
+        for media in video['media'].get('video', []) + video['media'].get('audio', []):
+            if media['transcoding_status'] != 'processed':
+                continue
+            fmt = {
+                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
+                'abr': int(media['audio_bitrate']),
+                'format_id': compat_str(media['media_resolution_id']),
+            }
+            vbr = int(media['video_bitrate'])
+            if vbr:
+                fmt.update({
+                    'vbr': vbr,
+                    'height': int(media['height']),
+                })
+            formats.append(fmt)
  
          self._sort_formats(formats)
  
          title = video['title']
          description = video.get('description')
-        thumbnail = self._proto_relative_url(
-            video.get('thumbnail') or video.get('thumbnail_src'),
-            scheme='http:')
-        uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
-        uploader_id = data.get('user', {}).get('name')
+        thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
+        uploader = video.get('user_alias') or video.get('channel')
          timestamp = int_or_none(video.get('upload_time'))
          duration = video['duration']
          view_count = video.get('raw_view_count')
-        like_count = video.get('rlikes')
-        if like_count is None:
-            like_count = video.get('total_likes')
-        dislike_count = video.get('rhates')
-        if dislike_count is None:
-            dislike_count = video.get('total_hates')
+        like_count = video.get('total_likes')
+        dislike_count = video.get('total_hates')
  
          comments = video.get('comments')
          comment_count = None
@@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
          else:
              comment_count = len(comments)
  
+        categories = [tag['text'] for tag in video['tags']]
+
          return {
              'id': video_id,
              'formats': formats,
@@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
              'description': description,
              'thumbnail': thumbnail,
              'uploader': uploader,
-            'uploader_id': uploader_id,
              'timestamp': timestamp,
              'duration': duration,
              'view_count': view_count,
              'like_count': like_count,
              'dislike_count': dislike_count,
              'comment_count': comment_count,
+            'categories': categories,
          }
diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py

new file mode 100644 (file)

index 0000000..a9aa72e
--- /dev/null
+++ b/youtube_dl/extractor/xboxclips.py
@@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
+)
+
+
+class XboxClipsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
+    _TEST = {
+        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+        'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
+        'info_dict': {
+            'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
+            'ext': 'mp4',
+            'title': 'Iabdulelah playing Upload Studio',
+            'filesize_approx': 28101836.8,
+            'timestamp': 1407388500,
+            'upload_date': '20140807',
+            'duration': 56,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'>Link: <a href="([^"]+)">', webpage, 'video URL')
+        title = self._html_search_regex(
+            r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
+        timestamp = parse_iso8601(self._html_search_regex(
+            r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
+        filesize = float_or_none(self._html_search_regex(
+            r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
+        duration = int_or_none(self._html_search_regex(
+            r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'>Views: (\d+)<', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'timestamp': timestamp,
+            'filesize_approx': filesize,
+            'duration': duration,
+            'view_count': view_count,
+        }
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index d84be25620eecb944845b74299510067772c583f..0e3b33b1652fe1242b36cb79d131acb6694066da 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,7 +15,7 @@ from ..utils import (
  
  class YahooIE(InfoExtractor):
      IE_DESC = 'Yahoo screen and movies'
-    _VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
+    _VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
      _TESTS = [
          {
              'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -46,12 +46,23 @@ class YahooIE(InfoExtractor):
                  'title': 'The World Loves Spider-Man',
                  'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
              }
-        }
+        },
+        {
+            'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+            'md5': '60e8ac193d8fb71997caa8fce54c6460',
+            'info_dict': {
+                'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+                'ext': 'mp4',
+                'title': "Yahoo Saves 'Community'",
+                'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+            }
+        },
      ]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
+        url = mobj.group('url')
          webpage = self._download_webpage(url, video_id)
  
          items_json = self._search_regex(
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 73a01107d722437ebb62d1a82ec72a33e49124cf..225e2b7f4681e8cce471a8a80af0f64eb14e071e 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -225,7 +225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
  
          # Dash webm audio
-        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
          '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
  
          # RTMP (unnamed)
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          """Indicate the download will use the RTMP protocol."""
          self.to_screen(u'RTMP download detected')
  
-    def _extract_signature_function(self, video_id, player_url, slen):
+    def _signature_cache_id(self, example_sig):
+        """ Return a string representation of a signature """
+        return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+    def _extract_signature_function(self, video_id, player_url, example_sig):
          id_m = re.match(
              r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
              player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          player_id = id_m.group('id')
  
          # Read from filesystem cache
-        func_id = '%s_%s_%d' % (player_type, player_id, slen)
+        func_id = '%s_%s_%s' % (
+            player_type, player_id, self._signature_cache_id(example_sig))
          assert os.path.basename(func_id) == func_id
          cache_dir = get_cachedir(self._downloader.params)
  
@@ -369,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  return lambda s: u''.join(s[i] for i in cache_spec)
              except IOError:
                  pass  # No cache available
+            except ValueError:
+                try:
+                    file_size = os.path.getsize(cache_fn)
+                except (OSError, IOError) as oe:
+                    file_size = str(oe)
+                self._downloader.report_warning(
+                    u'Cache %s failed (%s)' % (cache_fn, file_size))
  
          if player_type == 'js':
              code = self._download_webpage(
@@ -388,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          if cache_enabled:
              try:
-                test_string = u''.join(map(compat_chr, range(slen)))
+                test_string = u''.join(map(compat_chr, range(len(example_sig))))
                  cache_res = res(test_string)
                  cache_spec = [ord(c) for c in cache_res]
                  try:
@@ -404,7 +416,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          return res
  
-    def _print_sig_code(self, func, slen):
+    def _print_sig_code(self, func, example_sig):
          def gen_sig_code(idxs):
              def _genslice(start, end, step):
                  starts = u'' if start == 0 else str(start)
@@ -433,11 +445,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              else:
                  yield _genslice(start, i, step)
  
-        test_string = u''.join(map(compat_chr, range(slen)))
+        test_string = u''.join(map(compat_chr, range(len(example_sig))))
          cache_res = func(test_string)
          cache_spec = [ord(c) for c in cache_res]
          expr_code = u' + '.join(gen_sig_code(cache_spec))
-        code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
+        signature_id_tuple = '(%s)' % (
+            ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+                u'    return %s\n') % (signature_id_tuple, expr_code)
          self.to_screen(u'Extracted signature function:\n' + code)
  
      def _parse_sig_js(self, jscode):
@@ -465,20 +480,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          if player_url.startswith(u'//'):
              player_url = u'https:' + player_url
          try:
-            player_id = (player_url, len(s))
+            player_id = (player_url, self._signature_cache_id(s))
              if player_id not in self._player_cache:
                  func = self._extract_signature_function(
-                    video_id, player_url, len(s)
+                    video_id, player_url, s
                  )
                  self._player_cache[player_id] = func
              func = self._player_cache[player_id]
              if self._downloader.params.get('youtube_print_sig_code'):
-                self._print_sig_code(func, len(s))
+                self._print_sig_code(func, s)
              return func(s)
          except Exception as e:
              tb = traceback.format_exc()
              raise ExtractorError(
-                u'Automatic signature extraction failed: ' + tb, cause=e)
+                u'Signature extraction failed: ' + tb, cause=e)
  
      def _get_available_subtitles(self, video_id, webpage):
          try:
@@ -613,7 +628,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              data = compat_urllib_parse.urlencode({
                  'video_id': video_id,
                  'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                'sts':'16268',
+                'sts': self._search_regex(
+                    r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
              })
              video_info_url = proto + '://www.youtube.com/get_video_info?' + data
              video_info_webpage = self._download_webpage(video_info_url, video_id,
@@ -807,50 +823,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              url_map = {}
              for url_data_str in encoded_url_map.split(','):
                  url_data = compat_parse_qs(url_data_str)
-                if 'itag' in url_data and 'url' in url_data:
-                    url = url_data['url'][0]
-                    if 'sig' in url_data:
-                        url += '&signature=' + url_data['sig'][0]
-                    elif 's' in url_data:
-                        encrypted_sig = url_data['s'][0]
-
-                        if not age_gate:
-                            jsplayer_url_json = self._search_regex(
-                                r'"assets":.+?"js":\s*("[^"]+")',
-                                video_webpage, u'JS player URL')
-                            player_url = json.loads(jsplayer_url_json)
+                if 'itag' not in url_data or 'url' not in url_data:
+                    continue
+                format_id = url_data['itag'][0]
+                url = url_data['url'][0]
+
+                if 'sig' in url_data:
+                    url += '&signature=' + url_data['sig'][0]
+                elif 's' in url_data:
+                    encrypted_sig = url_data['s'][0]
+
+                    if not age_gate:
+                        jsplayer_url_json = self._search_regex(
+                            r'"assets":.+?"js":\s*("[^"]+")',
+                            video_webpage, u'JS player URL')
+                        player_url = json.loads(jsplayer_url_json)
+                    if player_url is None:
+                        player_url_json = self._search_regex(
+                            r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                            video_webpage, u'age gate player URL')
+                        player_url = json.loads(player_url_json)
+
+                    if self._downloader.params.get('verbose'):
                          if player_url is None:
-                            player_url_json = self._search_regex(
-                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                                video_webpage, u'age gate player URL')
-                            player_url = json.loads(player_url_json)
-
-                        if self._downloader.params.get('verbose'):
-                            if player_url is None:
-                                player_version = 'unknown'
-                                player_desc = 'unknown'
+                            player_version = 'unknown'
+                            player_desc = 'unknown'
+                        else:
+                            if player_url.endswith('swf'):
+                                player_version = self._search_regex(
+                                    r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+                                    u'flash player', fatal=False)
+                                player_desc = 'flash player %s' % player_version
                              else:
-                                if player_url.endswith('swf'):
-                                    player_version = self._search_regex(
-                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                        u'flash player', fatal=False)
-                                    player_desc = 'flash player %s' % player_version
-                                else:
-                                    player_version = self._search_regex(
-                                        r'html5player-(.+?)\.js', video_webpage,
-                                        'html5 player', fatal=False)
-                                    player_desc = u'html5 player %s' % player_version
-
-                            parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
-                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
-                                (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
-
-                        signature = self._decrypt_signature(
-                            encrypted_sig, video_id, player_url, age_gate)
-                        url += '&signature=' + signature
-                    if 'ratebypass' not in url:
-                        url += '&ratebypass=yes'
-                    url_map[url_data['itag'][0]] = url
+                                player_version = self._search_regex(
+                                    r'html5player-([^/]+?)(?:/html5player)?\.js',
+                                    player_url,
+                                    'html5 player', fatal=False)
+                                player_desc = u'html5 player %s' % player_version
+
+                        parts_sizes = self._signature_cache_id(encrypted_sig)
+                        self.to_screen(u'{%s} signature length %s, %s' %
+                            (format_id, parts_sizes, player_desc))
+
+                    signature = self._decrypt_signature(
+                        encrypted_sig, video_id, player_url, age_gate)
+                    url += '&signature=' + signature
+                if 'ratebypass' not in url:
+                    url += '&ratebypass=yes'
+                url_map[format_id] = url
              formats = _map_to_format_list(url_map)
          elif video_info.get('hlsvp'):
              manifest_url = video_info['hlsvp'][0]
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py

index 13ad5ba1a32f0a57e1f49a078a50dbdca698342e..c40cd376d120f2063bb4cf6958ca4cf701db1f00 100644 (file)
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,5 +1,6 @@
  from __future__ import unicode_literals
  
+import json
  import re
  
  from .utils import (
@@ -40,8 +41,9 @@ class JSInterpreter(object):
              assign = lambda v: v
              expr = stmt[len('return '):]
          else:
-            raise ExtractorError(
-                'Cannot determine left side of statement in %r' % stmt)
+            # Try interpreting it as an expression
+            expr = stmt
+            assign = lambda v: v
  
          v = self.interpret_expression(expr, local_vars, allow_recursion)
          return assign(v)
@@ -53,35 +55,63 @@ class JSInterpreter(object):
          if expr.isalpha():
              return local_vars[expr]
  
-        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+        try:
+            return json.loads(expr)
+        except ValueError:
+            pass
+
+        m = re.match(
+            r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            expr)
          if m:
+            variable = m.group('var')
              member = m.group('member')
-            variable = m.group('in')
+            arg_str = m.group('args')
  
-            if variable not in local_vars:
+            if variable in local_vars:
+                obj = local_vars[variable]
+            else:
                  if variable not in self._objects:
                      self._objects[variable] = self.extract_object(variable)
                  obj = self._objects[variable]
-                key, args = member.split('(', 1)
-                args = args.strip(')')
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in args.split(',')]
-                return obj[key](argvals)
-
-            val = local_vars[variable]
-            if member == 'split("")':
-                return list(val)
-            if member == 'join("")':
-                return ''.join(val)
-            if member == 'length':
-                return len(val)
-            if member == 'reverse()':
-                return val[::-1]
-            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-            if slice_m:
-                idx = self.interpret_expression(
-                    slice_m.group('idx'), local_vars, allow_recursion - 1)
-                return val[idx:]
+
+            if arg_str is None:
+                # Member access
+                if member == 'length':
+                    return len(obj)
+                return obj[member]
+
+            assert expr.endswith(')')
+            # Function call
+            if arg_str == '':
+                argvals = tuple()
+            else:
+                argvals = tuple([
+                    self.interpret_expression(v, local_vars, allow_recursion)
+                    for v in arg_str.split(',')])
+
+            if member == 'split':
+                assert argvals == ('',)
+                return list(obj)
+            if member == 'join':
+                assert len(argvals) == 1
+                return argvals[0].join(obj)
+            if member == 'reverse':
+                assert len(argvals) == 0
+                obj.reverse()
+                return obj
+            if member == 'slice':
+                assert len(argvals) == 1
+                return obj[argvals[0]:]
+            if member == 'splice':
+                assert isinstance(obj, list)
+                index, howMany = argvals
+                res = []
+                for i in range(index, min(index + howMany, len(obj))):
+                    res.append(obj.pop(index))
+                return res
+
+            return obj[member](argvals)
  
          m = re.match(
              r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
@@ -103,10 +133,11 @@ class JSInterpreter(object):
              r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
          if m:
              fname = m.group('func')
+            argvals = tuple([
+                int(v) if v.isdigit() else local_vars[v]
+                for v in m.group('args').split(',')])
              if fname not in self._functions:
                  self._functions[fname] = self.extract_function(fname)
-            argvals = [int(v) if v.isdigit() else local_vars[v]
-                       for v in m.group('args').split(',')]
              return self._functions[fname](argvals)
          raise ExtractorError('Unsupported JS expression %r' % expr)
  
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 3ecd798d74217011920135199ee59c76d58852c0..42ad520f9cac27581aa4edbc3606a702f3376b82 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -24,6 +24,7 @@ import socket
  import struct
  import subprocess
  import sys
+import tempfile
  import traceback
  import xml.etree.ElementTree
  import zlib
@@ -228,22 +229,46 @@ else:
          assert type(s) == type(u'')
          print(s)
  
-# In Python 2.x, json.dump expects a bytestream.
-# In Python 3.x, it writes to a character stream
-if sys.version_info < (3,0):
-    def write_json_file(obj, fn):
-        with open(fn, 'wb') as f:
-            json.dump(obj, f)
-else:
-    def write_json_file(obj, fn):
-        with open(fn, 'w', encoding='utf-8') as f:
-            json.dump(obj, f)
  
-if sys.version_info >= (2,7):
+def write_json_file(obj, fn):
+    """ Encode obj as JSON and write it to fn, atomically """
+
+    args = {
+        'suffix': '.tmp',
+        'prefix': os.path.basename(fn) + '.',
+        'dir': os.path.dirname(fn),
+        'delete': False,
+    }
+
+    # In Python 2.x, json.dump expects a bytestream.
+    # In Python 3.x, it writes to a character stream
+    if sys.version_info < (3, 0):
+        args['mode'] = 'wb'
+    else:
+        args.update({
+            'mode': 'w',
+            'encoding': 'utf-8',
+        })
+
+    tf = tempfile.NamedTemporaryFile(**args)
+
+    try:
+        with tf:
+            json.dump(obj, tf)
+        os.rename(tf.name, fn)
+    except:
+        try:
+            os.remove(tf.name)
+        except OSError:
+            pass
+        raise
+
+
+if sys.version_info >= (2, 7):
      def find_xpath_attr(node, xpath, key, val):
          """ Find the xpath xpath[@key=val] """
-        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
+        assert re.match(r'^[a-zA-Z-]+$', key)
+        assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
          expr = xpath + u"[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
@@ -827,6 +852,7 @@ def unified_strdate(date_str):
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
          '%Y-%m-%d',
+        '%Y/%m/%d',
          '%d.%m.%Y',
          '%d/%m/%Y',
          '%Y/%m/%d %H:%M:%S',
@@ -852,6 +878,8 @@ def unified_strdate(date_str):
      return upload_date
  
  def determine_ext(url, default_ext=u'unknown_video'):
+    if url is None:
+        return default_ext
      guess = url.partition(u'?')[0].rpartition(u'.')[2]
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
@@ -1271,9 +1299,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
      if get_attr:
          if v is not None:
              v = getattr(v, get_attr, None)
+    if v == '':
+        v = None
      return default if v is None else (int(v) * invscale // scale)
  
  
+def str_or_none(v, default=None):
+    return default if v is None else compat_str(v)
+
+
  def str_to_int(int_str):
      if int_str is None:
          return None
@@ -1440,6 +1474,34 @@ def strip_jsonp(code):
      return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
  
  
+def js_to_json(code):
+    def fix_kv(m):
+        key = m.group(2)
+        if key.startswith("'"):
+            assert key.endswith("'")
+            assert '"' not in key
+            key = '"%s"' % key[1:-1]
+        elif not key.startswith('"'):
+            key = '"%s"' % key
+
+        value = m.group(4)
+        if value.startswith("'"):
+            assert value.endswith("'")
+            assert '"' not in value
+            value = '"%s"' % value[1:-1]
+
+        return m.group(1) + key + m.group(3) + value
+
+    res = re.sub(r'''(?x)
+            ([{,]\s*)
+            ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
+            (:\s*)
+            ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
+        ''', fix_kv, code)
+    res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+    return res
+
+
  def qualities(quality_ids):
      """ Get a numeric quality value out of a list of possible values """
      def q(qid):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index dca400d5e3bb04332ae0118d73a2c045ed92b6ef..15b9d6c61c6b3a87b9a4c89f7363b6787d4b18ef 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2014.07.23.2'
+__version__ = '2014.08.21.3'
author	Philipp Hagemeister <phihag@phihag.de>
	Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Fri, 22 Aug 2014 00:45:21 +0000 (02:45 +0200)
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
test/test_youtube_signature.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/downloader/common.py		patch \| blob \| history
youtube_dl/downloader/f4m.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/abc.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/aparat.py		patch \| blob \| history
youtube_dl/extractor/appletrailers.py		patch \| blob \| history
youtube_dl/extractor/ard.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/blinkx.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/br.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/dfb.py		patch \| blob \| history
youtube_dl/extractor/dump.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ellentv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/escapist.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/firedrive.py		patch \| blob \| history
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/gamestar.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/gdcvault.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/godtube.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/howstuffworks.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/izlesene.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/jove.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/justintv.py		patch \| blob \| history
youtube_dl/extractor/krasview.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/metacafe.py		patch \| blob \| history
youtube_dl/extractor/mitele.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/mojvideo.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nowness.py		patch \| blob \| history
youtube_dl/extractor/oe1.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/ooyala.py		patch \| blob \| history
youtube_dl/extractor/orf.py		patch \| blob \| history
youtube_dl/extractor/patreon.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/pbs.py		patch \| blob \| history
youtube_dl/extractor/reverbnation.py		patch \| blob \| history
youtube_dl/extractor/rtlnl.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/shared.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/streamcloud.py		patch \| blob \| history
youtube_dl/extractor/swrmediathek.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/tvplay.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ubu.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vevo.py		patch \| blob \| history
youtube_dl/extractor/vidme.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vube.py		patch \| blob \| history
youtube_dl/extractor/xboxclips.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/jsinterp.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history