Merge pull request #4831 from light94/master

author Philipp Hagemeister <phihag@phihag.de>

Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)
diff --git a/README.md b/README.md

index 09da0129ed5c629db67e85bbb81f40fd85cf0be0..68b41970a76b7e3352f6fb9847dc7aed6f5a7f17 100644 (file)
--- a/README.md
+++ b/README.md
@@ -534,7 +534,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
  
  ### How can I detect whether a given URL is supported by youtube-dl?
  
-For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
  
  It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
  
diff --git a/test/test_download.py b/test/test_download.py

index 412f3dbce8683766ba53061fb2aecee95339b829..6a149ae4f707e1dc048890b72a4903ccb8a5f785 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -89,7 +89,7 @@ def generator(test_case):
  
          for tc in test_cases:
              info_dict = tc.get('info_dict', {})
-            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+            if not (info_dict.get('id') and info_dict.get('ext')):
                  raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
  
          if 'skip' in test_case:
@@ -116,7 +116,7 @@ def generator(test_case):
          expect_warnings(ydl, test_case.get('expected_warnings', []))
  
          def get_tc_filename(tc):
-            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
+            return ydl.prepare_filename(tc.get('info_dict', {}))
  
          res_dict = None
  
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py

new file mode 100644 (file)

index 0000000..b91b8c4
--- /dev/null
+++ b/test/test_jsinterp.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.jsinterp import JSInterpreter
+
+
+class TestJSInterpreter(unittest.TestCase):
+    def test_basic(self):
+        jsi = JSInterpreter('function x(){;}')
+        self.assertEqual(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('function x3(){return 42;}')
+        self.assertEqual(jsi.call_function('x3'), 42)
+
+    def test_calc(self):
+        jsi = JSInterpreter('function x4(a){return 2*a+1;}')
+        self.assertEqual(jsi.call_function('x4', 3), 7)
+
+    def test_empty_return(self):
+        jsi = JSInterpreter('function f(){return; y()}')
+        self.assertEqual(jsi.call_function('f'), None)
+
+    def test_morespace(self):
+        jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
+        self.assertEqual(jsi.call_function('x', 3), 7)
+
+        jsi = JSInterpreter('function f () { x =  2  ; return x; }')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+    def test_strange_chars(self):
+        jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
+        self.assertEqual(jsi.call_function('$_xY1', 20), 21)
+
+    def test_operators(self):
+        jsi = JSInterpreter('function f(){return 1 << 5;}')
+        self.assertEqual(jsi.call_function('f'), 32)
+
+        jsi = JSInterpreter('function f(){return 19 & 21;}')
+        self.assertEqual(jsi.call_function('f'), 17)
+
+        jsi = JSInterpreter('function f(){return 11 >> 2;}')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+    def test_array_access(self):
+        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
+        self.assertEqual(jsi.call_function('f'), [5, 2, 7])
+
+    def test_parens(self):
+        jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
+        self.assertEqual(jsi.call_function('f'), 7)
+
+        jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
+        self.assertEqual(jsi.call_function('f'), 9)
+
+    def test_assignments(self):
+        jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), 31)
+
+        jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), 51)
+
+        jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
+        self.assertEqual(jsi.call_function('f'), -11)
+
+    def test_comments(self):
+        jsi = JSInterpreter('''
+        function x() {
+            var x = /* 1 + */ 2;
+            var y = /* 30
+            * 40 */ 50;
+            return x + y;
+        }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 52)
+
+    def test_precedence(self):
+        jsi = JSInterpreter('''
+        function x() {
+            var a = [10, 20, 30, 40, 50];
+            var b = 6;
+            a[0]=a[b%a.length];
+            return a;
+        }''')
+        self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index e5a96cad52ab9fea84729f0657c6cf21e739b1bb..c18ce9660711c7845b36e34285042d54a13f5072 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,6 +25,7 @@ if os.name == 'nt':
      import ctypes
  
  from .compat import (
+    compat_basestring,
      compat_cookiejar,
      compat_expanduser,
      compat_http_client,
@@ -1558,7 +1559,7 @@ class YoutubeDL(object):
          # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
          # To work around aforementioned issue we will replace request's original URL with
          # percent-encoded one
-        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+        req_is_string = isinstance(req, compat_basestring)
          url = req if req_is_string else req.get_full_url()
          url_escaped = escape_url(url)
  
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index 497ca52de14e59aabbc5587a0c6238e02dd37a48..e989cdbbd180abf4543726e86d088cd45225bfca 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -114,6 +114,26 @@ except ImportError:
              string += pct_sequence.decode(encoding, errors)
          return string
  
+try:
+    compat_str = unicode  # Python 2
+except NameError:
+    compat_str = str
+
+try:
+    compat_basestring = basestring  # Python 2
+except NameError:
+    compat_basestring = str
+
+try:
+    compat_chr = unichr  # Python 2
+except NameError:
+    compat_chr = chr
+
+try:
+    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError:  # Python 2.6
+    from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
  
  try:
      from urllib.parse import parse_qs as compat_parse_qs
@@ -123,7 +143,7 @@ except ImportError:  # Python 2
  
      def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
                     encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, unicode
+        qs, _coerce_result = qs, compat_str
          pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
          r = []
          for name_value in pairs:
@@ -162,21 +182,6 @@ except ImportError:  # Python 2
                  parsed_result[name] = [value]
          return parsed_result
  
-try:
-    compat_str = unicode  # Python 2
-except NameError:
-    compat_str = str
-
-try:
-    compat_chr = unichr  # Python 2
-except NameError:
-    compat_chr = chr
-
-try:
-    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError:  # Python 2.6
-    from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
  try:
      from shlex import quote as shlex_quote
  except ImportError:  # Python < 3.3
@@ -362,6 +367,7 @@ def workaround_optparse_bug9161():
  
  __all__ = [
      'compat_HTTPError',
+    'compat_basestring',
      'compat_chr',
      'compat_cookiejar',
      'compat_expanduser',
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index aa58b52abb5998ba8879e6eba3a1d974484467e2..e527ee425365a096b50f541b1c75c82dcb9013fb 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -11,6 +11,7 @@ from ..compat import (
      compat_urllib_request,
  )
  from ..utils import (
+    encodeArgument,
      encodeFilename,
  )
  
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
-        args = [
-            '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
-            '-bsf:a', 'aac_adtstoasc',
-            encodeFilename(tmpfilename, for_subprocess=True)]
-
          ffpp = FFmpegPostProcessor(downloader=self)
          program = ffpp._executable
          if program is None:
              self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
              return False
          ffpp.check_version()
-        cmd = [program] + args
  
-        retval = subprocess.call(cmd)
+        args = [
+            encodeArgument(opt)
+            for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
+        args.append(encodeFilename(tmpfilename, True))
+
+        retval = subprocess.call(args)
          if retval == 0:
              fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
+            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
              self.try_rename(tmpfilename, filename)
              self._hook_progress({
                  'downloaded_bytes': fsize,
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py

index 967bd865c53229e7ff38997ea9a7f4a6ab19f92d..783b53e23035a7bd3f3feac628ff2de8daefbea5 100644 (file)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
  
      _TESTS = [{
          'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
-        'file': '22429276.mp4',
-        'md5': '469751912f1de0816a9fc9df8336476c',
-        'info_dict': {
-            'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
-            'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
-        },
-        'skip': 'Blocked outside of Germany',
+        'only_matching': True,
      }, {
          'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
          'info_dict': {
diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py

index 5e50c63d9aca7d2642239ccf32a5cedd91b05174..2b90bf4fc2fcba04fe7e164602196586713d4225 100644 (file)
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@@ -1,40 +1,38 @@
  from __future__ import unicode_literals
  
-import re
-import json
-
  from .common import InfoExtractor
  
  
  class DefenseGouvFrIE(InfoExtractor):
      IE_NAME = 'defense.gouv.fr'
-    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
-                  r'ligthboxvideo/base-de-medias/webtv/(.*)')
+    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
  
      _TEST = {
          'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
-        'file': '11213.mp4',
          'md5': '75bba6124da7e63d2d60b5244ec9430c',
-        "info_dict": {
-            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+        'info_dict': {
+            'id': '11213',
+            'ext': 'mp4',
+            'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
          }
      }
  
      def _real_extract(self, url):
-        title = re.match(self._VALID_URL, url).group(1)
+        title = self._match_id(url)
          webpage = self._download_webpage(url, title)
+
          video_id = self._search_regex(
              r"flashvars.pvg_id=\"(\d+)\";",
              webpage, 'ID')
  
          json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
                      + video_id)
-        info = self._download_webpage(json_url, title,
-                                      'Downloading JSON config')
-        video_url = json.loads(info)['renditions'][0]['url']
-
-        return {'id': video_id,
-                'ext': 'mp4',
-                'url': video_url,
-                'title': title,
-                }
+        info = self._download_json(json_url, title, 'Downloading JSON config')
+        video_url = info['renditions'][0]['url']
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'url': video_url,
+            'title': title,
+        }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index bbc760a4990cac1b6cdb731c161d61c853a72729..170d6807529ac9b121187786cf9329b3b3525dc3 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
  
  class GenerationQuoiIE(InfoExtractor):
      IE_NAME = 'france2.fr:generation-quoi'
-    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
+    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
  
      _TEST = {
          'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
-        'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
          'info_dict': {
+            'id': 'k7FJX8VBcvvLmX4wA5Q',
+            'ext': 'mp4',
              'title': 'Génération Quoi - Garde à Vous',
              'uploader': 'Génération Quoi',
          },
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
              # It uses Dailymotion
              'skip_download': True,
          },
-        'skip': 'Only available from France',
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
-        info_json = self._download_webpage(info_url, name)
+        display_id = self._match_id(url)
+        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
+        info_json = self._download_webpage(info_url, display_id)
          info = json.loads(info_json)
          return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
                                 ie='Dailymotion')
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py

index 6949a57c70dd9b378c4879dad8afd4f3b18e558a..29638a1948ff1230403f313f1c7725ab69224434 100644 (file)
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
                  'like_count': int,
              }
          },
+        {
+            'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+            'md5': 'c1defca721ce25b2354e927d3e4b3dec',
+            'info_dict': {
+                'id': '3928201',
+                'ext': 'mp4',
+                'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
+                'duration': 1472.906,
+                'uploader': 'Canal Brasil',
+                'uploader_id': 705,
+                'like_count': int,
+            }
+        },
      ]
  
      class MD5():
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
              signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
              signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
  
-            formats.append({
-                'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
-                'format_id': resource_id,
-                'height': resource['height']
-            })
+            resource_url = resource['url']
+            signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
+            if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
+            else:
+                formats.append({
+                    'url': signed_url,
+                    'format_id': resource_id,
+                    'height': resource.get('height'),
+                })
  
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py

index dbfe4cc03fd8c569ed2f05d5ae9c86c36bb9e278..364dc878ee23b98413a7f2c6735124d50d4f487b 100644 (file)
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
-        'file': '48863.flv',
          'md5': '29aca1e47ae68fc28804aca89f29507e',
          'info_dict': {
+            'id': '48863',
+            'ext': 'flv',
              'title': 'Ready To Go',
          },
          'skip': 'Only available from China',
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
          title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py

index 97dcb518a3587406bc93a44c39344630cafe7119..82eddec511850ade9b4786636027597baf75dd29 100644 (file)
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -7,10 +7,6 @@ from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
-    compat_urllib_parse,
-)
-from ..aes import (
-    aes_decrypt_text
  )
  
  
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
      _TEST = {
          'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        'file': '1214711.mp4',
          'md5': '6e297b7e789329923fcf83abb67c9289',
          'info_dict': {
+            'id': '1214711',
+            'ext': 'mp4',
              'title': 'Petite Asian Lady Mai Playing In Bathtub',
              'age_limit': 18,
          }
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
              embedded_url = mobj.group(1)
              return self.url_result(embedded_url)
  
-        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
-        if 'encrypted=true' in webpage:
-            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
-            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        video_title = self._html_search_regex(
+            r'<h1 [^>]*>([^<]+)', webpage, 'title')
+        video_url = self._html_search_regex(
+            r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
          path = compat_urllib_parse_urlparse(video_url).path
          extension = os.path.splitext(path)[1][1:]
          format = path.split('/')[4].split('_')[:2]
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py

index db2028e9f5148d37f69f0d7c4c41fbbb77d88928..b08f6e3c9548de02217e43bebbf20b5f2ab871e8 100644 (file)
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      parse_duration,
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
-        'file': '50355319.mp4',
          'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
          'info_dict': {
+            'id': '50355319',
+            'ext': 'mp4',
              'title': 'IL DIVO',
              'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
              'duration': 6254,
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
          xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
          doc = self._download_xml(xml_url, video_id)
  
diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py

index b818cf50c85c79865b5afc09090d6261e81d08c6..3cd4a3a192ce3f6b611f6b3f4f3d928b75c9bba0 100644 (file)
--- a/youtube_dl/extractor/macgamestore.py
+++ b/youtube_dl/extractor/macgamestore.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import ExtractorError
  
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
-        'file': '2450.m4v',
          'md5': '8649b8ea684b6666b4c5be736ecddc61',
          'info_dict': {
+            'id': '2450',
+            'ext': 'm4v',
              'title': 'Crow',
          }
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading trailer page')
  
-        if re.search(r'>Missing Media<', webpage) is not None:
-            raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
+        if '>Missing Media<' in webpage:
+            raise ExtractorError(
+                'Trailer %s does not exist' % video_id, expected=True)
  
          video_title = self._html_search_regex(
              r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py

index 88c9501cd4e34492003a1fe67923d1d84a9e2d2b..6db3c67a5a471d9cd850ad3bd828a9e2478c00e3 100644 (file)
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -1,21 +1,19 @@
  from __future__ import unicode_literals
  
-import json
-import re
-
  from .common import InfoExtractor
  from ..utils import int_or_none
  
  
  class MporaIE(InfoExtractor):
-    _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
+    _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
      IE_NAME = 'MPORA'
  
      _TEST = {
          'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
-        'file': 'AAdo8okx4wiz.mp4',
          'md5': 'a7a228473eedd3be741397cf452932eb',
          'info_dict': {
+            'id': 'AAdo8okx4wiz',
+            'ext': 'mp4',
              'title': 'Katy Curd -  Winter in the Forest',
              'duration': 416,
              'uploader': 'Peter Newman Media',
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
+
          data_json = self._search_regex(
              r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
-
-        data = json.loads(data_json)
+        data = self._parse_json(data_json, video_id)
  
          uploader = data['info_overlay'].get('username')
          duration = data['video']['duration'] // 1000
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index 7a3b62ebe7bb56c109ba9dad86fa11112a1f69b0..bc7f49ebbac86cda7aa1bb711076b783e24bfea8 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -79,12 +79,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
              try:
                  _, _, ext = rendition.attrib['type'].partition('/')
                  rtmp_video_url = rendition.find('./src').text
-                formats.append({'ext': ext,
-                                'url': self._transform_rtmp_url(rtmp_video_url),
-                                'format_id': rendition.get('bitrate'),
-                                'width': int(rendition.get('width')),
-                                'height': int(rendition.get('height')),
-                                })
+                if rtmp_video_url.endswith('siteunavail.png'):
+                    continue
+                formats.append({
+                    'ext': ext,
+                    'url': self._transform_rtmp_url(rtmp_video_url),
+                    'format_id': rendition.get('bitrate'),
+                    'width': int(rendition.get('width')),
+                    'height': int(rendition.get('height')),
+                })
              except (KeyError, TypeError):
                  raise ExtractorError('Invalid rendition field.')
          self._sort_formats(formats)
@@ -240,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
      _TESTS = [
          {
              'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            'file': '853555.mp4',
              'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
              'info_dict': {
+                'id': '853555',
+                'ext': 'mp4',
                  'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
                  'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
              },
          },
-        {
-            'add_ie': ['Vevo'],
-            'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            'file': 'USCJY1331283.mp4',
-            'md5': '73b4e7fcadd88929292fe52c3ced8caf',
-            'info_dict': {
-                'title': 'Everything Has Changed',
-                'upload_date': '20130606',
-                'uploader': 'Taylor Swift',
-            },
-            'skip': 'VEVO is only available in some countries',
-        },
      ]
  
      def _get_thumbnail_url(self, uri, itemdoc):
@@ -272,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
              webpage = self._download_webpage(url, video_id)
  
              # Some videos come from Vevo.com
-            m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
-                               webpage, re.DOTALL)
+            m_vevo = re.search(
+                r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
              if m_vevo:
                  vevo_id = m_vevo.group(1)
                  self.to_screen('Vevo video detected: %s' % vevo_id)
diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py

index efc903afa93465473621ef89ccc81142320b90ee..dff78e4862390e4e6468a34d804001d2156221a7 100644 (file)
--- a/youtube_dl/extractor/nerdcubed.py
+++ b/youtube_dl/extractor/nerdcubed.py
@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
      _TEST = {
          'url': 'http://www.nerdcubed.co.uk/feed.json',
          'info_dict': {
+            'id': 'nerdcubed-feed',
              'title': 'nerdcubed.co.uk feed',
          },
          'playlist_mincount': 1300,
diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py

index 59dc137cc225889feb9428dd70f42a91451a951d..efa4afeb6a6615a4fa1e90781f27d3dd65083810 100644 (file)
--- a/youtube_dl/extractor/ringtv.py
+++ b/youtube_dl/extractor/ringtv.py
@@ -6,12 +6,13 @@ from .common import InfoExtractor
  
  
  class RingTVIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
      _TEST = {
          "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
-        "file": "857645.mp4",
          "md5": "d25945f5df41cdca2d2587165ac28720",
          "info_dict": {
+            'id': '857645',
+            'ext': 'mp4',
              "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
              "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
          }
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py

index c1500b82feb83c419bdb3141d6fc3a5bcd90d90d..e8bb20a0803700937875355d2f854d1de88cea1a 100644 (file)
--- a/youtube_dl/extractor/rottentomatoes.py
+++ b/youtube_dl/extractor/rottentomatoes.py
@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
  
      _TEST = {
          'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
-        'file': '613340.mp4',
          'info_dict': {
+            'id': '613340',
+            'ext': 'mp4',
              'title': 'TOY STORY 3',
              'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
          },
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py

index a73e6f331fc02a8977863a412227681b3838b91a..ef766237bf318d40da067a6a820a725fbe0da286 100644 (file)
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
                          'vbr': int(quality),
                      }
                  elif transport == 'm3u8':
-                    fmt = {
-                        'url': url,
-                        'ext': 'mp4',
-                    }
+                    formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
+                    continue
                  else:
                      fmt = {
                          'url': url
diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py

index 16dc3736b48bfb15a94b98713beef4757446b642..c013d678f70f36f8589007cf1b6cc14d036cde21 100644 (file)
--- a/youtube_dl/extractor/servingsys.py
+++ b/youtube_dl/extractor/servingsys.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      int_or_none,
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
+        'info_dict': {
+            'id': '5349193',
+            'title': 'AdAPPter_Hyundai_demo',
+        },
          'playlist': [{
-            'file': '29955898.flv',
              'md5': 'baed851342df6846eb8677a60a011a0f',
              'info_dict': {
+                'id': '29955898',
+                'ext': 'flv',
                  'title': 'AdAPPter_Hyundai_demo (1)',
                  'duration': 74,
                  'tbr': 1378,
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
                  'height': 400,
              },
          }, {
-            'file': '29907998.flv',
              'md5': '979b4da2655c4bc2d81aeb915a8c5014',
              'info_dict': {
+                'id': '29907998',
+                'ext': 'flv',
                  'title': 'AdAPPter_Hyundai_demo (2)',
                  'duration': 34,
                  'width': 854,
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
          'params': {
              'playlistend': 2,
          },
-        'skip': 'Blocked in the US [sic]',
+        '_skip': 'Blocked in the US [sic]',
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        pl_id = mobj.group('id')
-
+        pl_id = self._match_id(url)
          vast_doc = self._download_xml(url, pl_id)
+
          title = vast_doc.find('.//AdTitle').text
          media = vast_doc.find('.//MediaFile').text
          info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py

index a63d126d4560dda83133fa6280116ca517e71bdc..0891a441f85f42b75d91f1d267fabdd1b5e952ce 100644 (file)
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -11,7 +11,7 @@ from ..compat import (
  
  
  class SinaIE(InfoExtractor):
-    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
                          (
                              (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
                              |
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
-            'file': '110028898.flv',
              'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
              'info_dict': {
+                'id': '110028898',
+                'ext': 'flv',
                  'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
              }
          },
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
          },
      ]
  
-    @classmethod
-    def suitable(cls, url):
-        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
-
      def _extract_video(self, video_id):
          data = compat_urllib_parse.urlencode({'vid': video_id})
          url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
                  }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+        mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
          if mobj.group('token') is not None:
              # The video id is in the redirected url
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 5d60c4939588ad543840b501ef0e552ad0b1e673..c5284fa673b7eda4f74191fba6a788df39939a51 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
      _TESTS = [{
          'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
          'info_dict': {
+            'id': '2284613',
              'title': 'The Royal Concept EP',
          },
          'playlist_mincount': 6,
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
          return {
              '_type': 'playlist',
              'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
-            'id': info['id'],
+            'id': '%s' % info['id'],
              'title': info['title'],
          }
  
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 5fa67eb8d4441d62c1591289551171cdbcbcf45b..18a8237197ca4f017252fa181b08bfacf67c44b2 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
-            'file': '80187.mp4',
              'md5': '3f7746aa0dc86de18df7539903d399ea',
              'info_dict': {
+                'id': '80187',
+                'ext': 'mp4',
                  'title': 'Conan Becomes A Mary Kay Beauty Consultant',
                  'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
              }
          }, {
              'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
-            'file': '19705.mp4',
              'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
              'info_dict': {
+                'id': '19705',
+                'ext': 'mp4',
                  "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
                  "title": "Louis C.K. Interview Pt. 1 11/3/11"
              }
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
          video_id = mobj.group("video_id")
          if not video_id:
              video_id = self._html_search_regex(
-                r'data-node-id="(\d+?)"',
+                r'<div\s+class="player".*?data-id="(\d+?)"',
                  webpage, 'video id')
  
          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py

index e54145105f45d6b15345b47f101fe7f804ec174d..b9e2ef8cab9a0e3a63bf5a46baf2b70a39afa1cb 100644 (file)
--- a/youtube_dl/extractor/teletask.py
+++ b/youtube_dl/extractor/teletask.py
@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
      _TEST = {
          'url': 'http://www.tele-task.de/archive/video/html5/26168/',
          'info_dict': {
+            'id': '26168',
              'title': 'Duplicate Detection',
          },
          'playlist': [{
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
  
      def _real_extract(self, url):
          lecture_id = self._match_id(url)
-
          webpage = self._download_webpage(url, lecture_id)
  
          title = self._html_search_regex(
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py

index 2837f9c8e5fcf9624acc31c156f775aad32454dd..4797d1310aaeec2664d822c052f26be5ea5210af 100644 (file)
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.tou.tv/30-vies/S04E41',
-        'file': '30-vies_S04E41.mp4',
          'info_dict': {
+            'id': '30-vies_S04E41',
+            'ext': 'mp4',
              'title': '30 vies Saison 4 / Épisode 41',
              'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
              'age_limit': 8,
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 06b0bed41e68401a8667cbabdca0d9796ea8ca3d..1bb47351435bd48832671b84038b6c4a749cdfbc 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
      _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
      _TESTS = [{
          'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
-        'file': '75524534.mp4',
          'md5': 'c507a72f780cacc12b2248bb4006d253',
          'info_dict': {
+            'id': '75524534',
+            'ext': 'mp4',
              'title': "DICK HARDWICK 'Comedian'",
              'uploader': 'Richard Hardwick',
          }
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py

index 313b9c15ddc576c226e1f4b8ee211bc816e2ebe9..c9048850061e1ecae4380557503a6b3927d2220c 100644 (file)
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
          {
              'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
              'playlist_mincount': 146,
+            'info_dict': {
+                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
+            }
          }
      ]
  
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 3b3678c6e638d29000dd6728358c76b319af4b9f..e4b26b84fe5cf65dfdcedc5d9fd9bf2b67e17f35 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1160,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      }, {
          'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
          'info_dict': {
+            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
              'title': 'YDL_Empty_List',
          },
          'playlist_count': 0,
@@ -1168,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
          'info_dict': {
              'title': '29C3: Not my department',
+            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
          },
          'playlist_count': 95,
      }, {
@@ -1175,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'url': 'PLBB231211A4F62143',
          'info_dict': {
              'title': '[OLD]Team Fortress 2 (Class-based LP)',
+            'id': 'PLBB231211A4F62143',
          },
          'playlist_mincount': 26,
      }, {
@@ -1182,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
          'info_dict': {
              'title': 'Uploads from Cauchemar',
+            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
          },
          'playlist_mincount': 799,
      }, {
          'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
          'info_dict': {
              'title': 'YDL_safe_search',
+            'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
          },
          'playlist_count': 2,
      }, {
@@ -1196,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'playlist_count': 4,
          'info_dict': {
              'title': 'JODA15',
+            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
          }
      }, {
          'note': 'Embedded SWF player',
@@ -1203,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
          'playlist_count': 4,
          'info_dict': {
              'title': 'JODA7',
+            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
          }
      }, {
          'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
          'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
          'info_dict': {
-                'title': 'Uploads from Interstellar Movie',
+            'title': 'Uploads from Interstellar Movie',
+            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
          },
          'playlist_mincout': 21,
      }]
@@ -1314,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
          'note': 'paginated channel',
          'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
          'playlist_mincount': 91,
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+        }
      }]
  
      def extract_videos_from_page(self, page):
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py

index b4617fbad0fc40323a129ce1218f9f97590c89bb..453e2732cc4faa453a98b153356c2188feef1d35 100644 (file)
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,59 +1,122 @@
  from __future__ import unicode_literals
  
  import json
+import operator
  import re
  
  from .utils import (
      ExtractorError,
  )
  
+_OPERATORS = [
+    ('|', operator.or_),
+    ('^', operator.xor),
+    ('&', operator.and_),
+    ('>>', operator.rshift),
+    ('<<', operator.lshift),
+    ('-', operator.sub),
+    ('+', operator.add),
+    ('%', operator.mod),
+    ('/', operator.truediv),
+    ('*', operator.mul),
+]
+_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
+_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+
+_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+
  
  class JSInterpreter(object):
-    def __init__(self, code):
-        self.code = code
+    def __init__(self, code, objects=None):
+        if objects is None:
+            objects = {}
+        self.code = self._remove_comments(code)
          self._functions = {}
-        self._objects = {}
+        self._objects = objects
+
+    def _remove_comments(self, code):
+        return re.sub(r'(?s)/\*.*?\*/', '', code)
  
-    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+    def interpret_statement(self, stmt, local_vars, allow_recursion=100):
          if allow_recursion < 0:
              raise ExtractorError('Recursion limit reached')
  
-        if stmt.startswith('var '):
-            stmt = stmt[len('var '):]
-        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                         r'=(?P<expr>.*)$', stmt)
-        if ass_m:
-            if ass_m.groupdict().get('index'):
-                def assign(val):
-                    lvar = local_vars[ass_m.group('out')]
-                    idx = self.interpret_expression(
-                        ass_m.group('index'), local_vars, allow_recursion)
-                    assert isinstance(idx, int)
-                    lvar[idx] = val
-                    return val
-                expr = ass_m.group('expr')
-            else:
-                def assign(val):
-                    local_vars[ass_m.group('out')] = val
-                    return val
-                expr = ass_m.group('expr')
-        elif stmt.startswith('return '):
-            assign = lambda v: v
-            expr = stmt[len('return '):]
+        should_abort = False
+        stmt = stmt.lstrip()
+        stmt_m = re.match(r'var\s', stmt)
+        if stmt_m:
+            expr = stmt[len(stmt_m.group(0)):]
          else:
-            # Try interpreting it as an expression
-            expr = stmt
-            assign = lambda v: v
+            return_m = re.match(r'return(?:\s+|$)', stmt)
+            if return_m:
+                expr = stmt[len(return_m.group(0)):]
+                should_abort = True
+            else:
+                # Try interpreting it as an expression
+                expr = stmt
  
          v = self.interpret_expression(expr, local_vars, allow_recursion)
-        return assign(v)
+        return v, should_abort
  
      def interpret_expression(self, expr, local_vars, allow_recursion):
+        expr = expr.strip()
+
+        if expr == '':  # Empty expression
+            return None
+
+        if expr.startswith('('):
+            parens_count = 0
+            for m in re.finditer(r'[()]', expr):
+                if m.group(0) == '(':
+                    parens_count += 1
+                else:
+                    parens_count -= 1
+                    if parens_count == 0:
+                        sub_expr = expr[1:m.start()]
+                        sub_result = self.interpret_expression(
+                            sub_expr, local_vars, allow_recursion)
+                        remaining_expr = expr[m.end():].strip()
+                        if not remaining_expr:
+                            return sub_result
+                        else:
+                            expr = json.dumps(sub_result) + remaining_expr
+                        break
+            else:
+                raise ExtractorError('Premature end of parens in %r' % expr)
+
+        for op, opfunc in _ASSIGN_OPERATORS:
+            m = re.match(r'''(?x)
+                (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
+                \s*%s
+                (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
+            if not m:
+                continue
+            right_val = self.interpret_expression(
+                m.group('expr'), local_vars, allow_recursion - 1)
+
+            if m.groupdict().get('index'):
+                lvar = local_vars[m.group('out')]
+                idx = self.interpret_expression(
+                    m.group('index'), local_vars, allow_recursion)
+                assert isinstance(idx, int)
+                cur = lvar[idx]
+                val = opfunc(cur, right_val)
+                lvar[idx] = val
+                return val
+            else:
+                cur = local_vars.get(m.group('out'))
+                val = opfunc(cur, right_val)
+                local_vars[m.group('out')] = val
+                return val
+
          if expr.isdigit():
              return int(expr)
  
-        if expr.isalpha():
-            return local_vars[expr]
+        var_m = re.match(
+            r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+            expr)
+        if var_m:
+            return local_vars[var_m.group('name')]
  
          try:
              return json.loads(expr)
@@ -61,7 +124,7 @@ class JSInterpreter(object):
              pass
  
          m = re.match(
-            r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+            r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
              expr)
          if m:
              variable = m.group('var')
@@ -114,23 +177,31 @@ class JSInterpreter(object):
              return obj[member](argvals)
  
          m = re.match(
-            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
          if m:
              val = local_vars[m.group('in')]
              idx = self.interpret_expression(
                  m.group('idx'), local_vars, allow_recursion - 1)
              return val[idx]
  
-        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-        if m:
-            a = self.interpret_expression(
-                m.group('a'), local_vars, allow_recursion)
-            b = self.interpret_expression(
-                m.group('b'), local_vars, allow_recursion)
-            return a % b
+        for op, opfunc in _OPERATORS:
+            m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
+            if not m:
+                continue
+            x, abort = self.interpret_statement(
+                m.group('x'), local_vars, allow_recursion - 1)
+            if abort:
+                raise ExtractorError(
+                    'Premature left-side return of %s in %r' % (op, expr))
+            y, abort = self.interpret_statement(
+                m.group('y'), local_vars, allow_recursion - 1)
+            if abort:
+                raise ExtractorError(
+                    'Premature right-side return of %s in %r' % (op, expr))
+            return opfunc(x, y)
  
          m = re.match(
-            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
          if m:
              fname = m.group('func')
              argvals = tuple([
@@ -139,6 +210,7 @@ class JSInterpreter(object):
              if fname not in self._functions:
                  self._functions[fname] = self.extract_function(fname)
              return self._functions[fname](argvals)
+
          raise ExtractorError('Unsupported JS expression %r' % expr)
  
      def extract_object(self, objname):
@@ -162,9 +234,11 @@ class JSInterpreter(object):
  
      def extract_function(self, funcname):
          func_m = re.search(
-            (r'(?:function %s|[{;]%s\s*=\s*function)' % (
-                re.escape(funcname), re.escape(funcname))) +
-            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            r'''(?x)
+                (?:function\s+%s|[{;]%s\s*=\s*function)\s*
+                \((?P<args>[^)]*)\)\s*
+                \{(?P<code>[^}]+)\}''' % (
+                re.escape(funcname), re.escape(funcname)),
              self.code)
          if func_m is None:
              raise ExtractorError('Could not find JS function %r' % funcname)
@@ -172,10 +246,16 @@ class JSInterpreter(object):
  
          return self.build_function(argnames, func_m.group('code'))
  
+    def call_function(self, funcname, *args):
+        f = self.extract_function(funcname)
+        return f(args)
+
      def build_function(self, argnames, code):
          def resf(args):
              local_vars = dict(zip(argnames, args))
              for stmt in code.split(';'):
-                res = self.interpret_statement(stmt, local_vars)
+                res, abort = self.interpret_statement(stmt, local_vars)
+                if abort:
+                    break
              return res
          return resf
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index a4c9813ecce0aa9ecef34ad4c3185ab2e75ce3ae..4ade0554e33597ce8aa965e147d2d73cce6a9fab 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -32,6 +32,7 @@ import xml.etree.ElementTree
  import zlib
  
  from .compat import (
+    compat_basestring,
      compat_chr,
      compat_getenv,
      compat_html_entities,
@@ -140,7 +141,7 @@ else:
      def find_xpath_attr(node, xpath, key, val):
          # Here comes the crazy part: In 2.6, if the xpath is a unicode,
          # .//node does not match if a node is a direct child of . !
-        if isinstance(xpath, unicode):
+        if isinstance(xpath, compat_str):
              xpath = xpath.encode('ascii')
  
          for f in node.findall(xpath):
@@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
  
  
  def parse_duration(s):
-    if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
+    if not isinstance(s, compat_basestring):
          return None
  
      s = s.strip()
@@ -1426,7 +1427,7 @@ def uppercase_escape(s):
  
  def escape_rfc3986(s):
      """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, unicode):
+    if sys.version_info < (3, 0) and isinstance(s, compat_str):
          s = s.encode('utf-8')
      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
  
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 5e6288e8e33f35e03b94deb47fb3b86d3bc43589..1e469d93be4943606c16b981b6b99a5ef5a54d8e 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.01.30.2'
+__version__ = '2015.02.02'
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 2 Feb 2015 11:03:28 +0000 (12:03 +0100)
README.md		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_jsinterp.py	[new file with mode: 0644]	patch \| blob
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/compat.py		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/extractor/ard.py		patch \| blob \| history
youtube_dl/extractor/defense.py		patch \| blob \| history
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/globo.py		patch \| blob \| history
youtube_dl/extractor/kankan.py		patch \| blob \| history
youtube_dl/extractor/keezmovies.py		patch \| blob \| history
youtube_dl/extractor/la7.py		patch \| blob \| history
youtube_dl/extractor/macgamestore.py		patch \| blob \| history
youtube_dl/extractor/mpora.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/nerdcubed.py		patch \| blob \| history
youtube_dl/extractor/ringtv.py		patch \| blob \| history
youtube_dl/extractor/rottentomatoes.py		patch \| blob \| history
youtube_dl/extractor/rutv.py		patch \| blob \| history
youtube_dl/extractor/servingsys.py		patch \| blob \| history
youtube_dl/extractor/sina.py		patch \| blob \| history
youtube_dl/extractor/soundcloud.py		patch \| blob \| history
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/teletask.py		patch \| blob \| history
youtube_dl/extractor/toutv.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/wdr.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/jsinterp.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history