Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
diff --combined youtube_dl/extractor/youtube.py

index 8e768ea4fbad4cf7bd58703e43e853e478d8c450,5b0d30ed1f426114bf5047997496ce1c08baa923..49cca4c6306cd379acc59ab5e2cf1633e34f7afc
--- 1/youtube_dl/extractor/youtube.py
--- 2/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@@ -1366,12 -1366,26 +1366,26 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
               raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
   
           # Look for the DASH manifest
-         dash_manifest_url_lst = video_info.get('dashmpd')
-         if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
-                 self._downloader.params.get('youtube_include_dash_manifest', False)):
+         if (self._downloader.params.get('youtube_include_dash_manifest', False)):
               try:
+                 # The DASH manifest used needs to be the one from the original video_webpage.
+                 # The one found in get_video_info seems to be using different signatures.
+                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
+                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
+                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
+                 if age_gate:
+                     dash_manifest_url = video_info.get('dashmpd')[0];
+                 else:
+                     x = re.search(r'ytplayer\.config = ({.*});', video_webpage)
+                     x = json.loads(x.group(1));
+                     dash_manifest_url = x['args']['dashmpd']
+                 def decrypt_sig(mobj):
+                     s = mobj.group(1)
+                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+                     return '/signature/%s' % dec_s
+                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                   dash_doc = self._download_xml(
-                     dash_manifest_url_lst[0], video_id,
+                     dash_manifest_url, video_id,
                       note=u'Downloading DASH manifest',
                       errnote=u'Could not download DASH manifest')
                   for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
@@@ -1443,9 -1457,9 +1457,9 @@@ class YoutubePlaylistIE(YoutubeBaseInfo
                        |
                           ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                        )"""
- -    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+ +    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
       _MORE_PAGES_INDICATOR = r'data-link-type="next"'
- -    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+ +    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
       IE_NAME = u'youtube:playlist'
   
       def _real_initialize(self):
@@@ -1493,31 -1507,29 +1507,31 @@@
               raise ExtractorError(u'For downloading YouTube.com top lists, use '
                   u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
   
+ +        url = self._TEMPLATE_URL % playlist_id
+ +        page = self._download_webpage(url, playlist_id)
+ +        more_widget_html = content_html = page
+ +
           # Extract the video ids from the playlist pages
           ids = []
   
           for page_num in itertools.count(1):
- -            url = self._TEMPLATE_URL % (playlist_id, page_num)
- -            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
- -            matches = re.finditer(self._VIDEO_RE, page)
+ +            matches = re.finditer(self._VIDEO_RE, content_html)
               # We remove the duplicates and the link with index 0
               # (it's not the first video of the playlist)
               new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
               ids.extend(new_ids)
   
- -            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
+ +            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ +            if not mobj:
                   break
   
- -        try:
- -            playlist_title = self._og_search_title(page)
- -        except RegexNotFoundError:
- -            self.report_warning(
- -                u'Playlist page is missing OpenGraph title, falling back ...',
- -                playlist_id)
- -            playlist_title = self._html_search_regex(
- -                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
+ +            more = self._download_json(
+ +                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+ +            content_html = more['content_html']
+ +            more_widget_html = more['load_more_widget_html']
+ +
+ +        playlist_title = self._html_search_regex(
+ +                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
   
           url_results = self._ids_to_results(ids)
           return self.playlist_result(url_results, playlist_id, playlist_title)
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Fri, 21 Feb 2014 14:02:47 +0000 (15:02 +0100)