Merge branch 'master' of https://github.com/linhua55/youtube-dl into linhua55-master

[youtube-dl] / youtube_dl / extractor / tudou.py
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py

index c89de5ba4a46bb261987d8dbee5f55b3d05492da..e800477e2c1014593df0763b9806c21ff73dd3f9 100644 (file)
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@@ -9,7 +9,7 @@ from .common import InfoExtractor
  
  
  class TudouIE(InfoExtractor):
  
  
  class TudouIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/?.*/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
      _TESTS = [{
          'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
          'md5': '140a49ed444bd22f93330985d8475fcb',
      _TESTS = [{
          'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
          'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -29,6 +29,8 @@ class TudouIE(InfoExtractor):
          }
      }]
  
          }
      }]
  
+    _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
+
      def _url_for_id(self, id, quality=None):
          info_url = "http://v2.tudou.com/f?id=" + str(id)
          if quality:
      def _url_for_id(self, id, quality=None):
          info_url = "http://v2.tudou.com/f?id=" + str(id)
          if quality:
@@ -54,6 +56,10 @@ class TudouIE(InfoExtractor):
          thumbnail_url = self._search_regex(
              r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
  
          thumbnail_url = self._search_regex(
              r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
  
+        player_url = self._search_regex(
+            r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
+            webpage, 'player URL', default=self._PLAYER_URL)
+
          segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
          segments = json.loads(segs_json)
          # It looks like the keys are the arguments that have to be passed as
          segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
          segments = json.loads(segs_json)
          # It looks like the keys are the arguments that have to be passed as
@@ -76,6 +82,9 @@ class TudouIE(InfoExtractor):
                  'ext': ext,
                  'title': title,
                  'thumbnail': thumbnail_url,
                  'ext': ext,
                  'title': title,
                  'thumbnail': thumbnail_url,
+                'http_headers': {
+                    'Referer': player_url,
+                },
              }
              result.append(part_info)
  
              }
              result.append(part_info)