Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
1  2 
youtube_dl/extractor/youtube.py

index 8e768ea4fbad4cf7bd58703e43e853e478d8c450,5b0d30ed1f426114bf5047997496ce1c08baa923..49cca4c6306cd379acc59ab5e2cf1633e34f7afc
@@@ -1366,12 -1366,26 +1366,26 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
              raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          # Look for the DASH manifest
-         dash_manifest_url_lst = video_info.get('dashmpd')
-         if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
-                 self._downloader.params.get('youtube_include_dash_manifest', False)):
+         if (self._downloader.params.get('youtube_include_dash_manifest', False)):
              try:
+                 # The DASH manifest used needs to be the one from the original video_webpage.
+                 # The one found in get_video_info seems to be using different signatures.
+                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
+                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
+                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
+                 if age_gate:
+                     dash_manifest_url = video_info.get('dashmpd')[0];
+                 else:
+                     x = re.search(r'ytplayer\.config = ({.*});', video_webpage)
+                     x = json.loads(x.group(1));
+                     dash_manifest_url = x['args']['dashmpd']
+                 def decrypt_sig(mobj):
+                     s = mobj.group(1)
+                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+                     return '/signature/%s' % dec_s
+                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                  dash_doc = self._download_xml(
-                     dash_manifest_url_lst[0], video_id,
+                     dash_manifest_url, video_id,
                      note=u'Downloading DASH manifest',
                      errnote=u'Could not download DASH manifest')
                  for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
@@@ -1443,9 -1457,9 +1457,9 @@@ class YoutubePlaylistIE(YoutubeBaseInfo
                       |
                          ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                       )"""
 -    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
 +    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
      _MORE_PAGES_INDICATOR = r'data-link-type="next"'
 -    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
 +    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
      IE_NAME = u'youtube:playlist'
  
      def _real_initialize(self):
              raise ExtractorError(u'For downloading YouTube.com top lists, use '
                  u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
  
 +        url = self._TEMPLATE_URL % playlist_id
 +        page = self._download_webpage(url, playlist_id)
 +        more_widget_html = content_html = page
 +
          # Extract the video ids from the playlist pages
          ids = []
  
          for page_num in itertools.count(1):
 -            url = self._TEMPLATE_URL % (playlist_id, page_num)
 -            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
 -            matches = re.finditer(self._VIDEO_RE, page)
 +            matches = re.finditer(self._VIDEO_RE, content_html)
              # We remove the duplicates and the link with index 0
              # (it's not the first video of the playlist)
              new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
              ids.extend(new_ids)
  
 -            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
 +            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 +            if not mobj:
                  break
  
 -        try:
 -            playlist_title = self._og_search_title(page)
 -        except RegexNotFoundError:
 -            self.report_warning(
 -                u'Playlist page is missing OpenGraph title, falling back ...',
 -                playlist_id)
 -            playlist_title = self._html_search_regex(
 -                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
 +            more = self._download_json(
 +                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
 +            content_html = more['content_html']
 +            more_widget_html = more['load_more_widget_html']
 +
 +        playlist_title = self._html_search_regex(
 +                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
  
          url_results = self._ids_to_results(ids)
          return self.playlist_result(url_results, playlist_id, playlist_title)