[YoutubeDL] Support DASH manifest downloading
[youtube-dl] / youtube_dl / extractor / youtube.py
index fcdbfe0bc959a011bebf8656184fe164b3eca84a..5d1297e0d27260dd1e0f389d5add17061fad0644 100644 (file)
@@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # TODO implement WebVTT downloading
                     pass
                 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
                     format_id = r.attrib['id']
                     video_url = url_el.text
                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         'filesize': filesize,
                         'fps': int_or_none(r.attrib.get('frameRate')),
                     }
+                    if segment_list:
+                        f.update({
+                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')]
+                        })
                     try:
                         existing_format = next(
                             fo for fo in formats
@@ -1412,12 +1418,8 @@ class YoutubeChannelIE(InfoExtractor):
             channel_page, 'channel id', default=None)
         if channel_playlist_id and channel_playlist_id.startswith('UC'):
             playlist_id = 'UU' + channel_playlist_id[2:]
-            channel_playlist = unescapeHTML(self._search_regex(
-                r'href="/?(watch\?v=[0-9A-Za-z_-]{11}&list=%s)"' % playlist_id,
-                channel_page, 'channel playlist URL', default=None))
-            if channel_playlist:
-                return self.url_result(
-                    compat_urlparse.urljoin(url, '/%s' % channel_playlist), 'YoutubePlaylist')
+            return self.url_result(
+                compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
 
         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
         autogenerated = re.search(r'''(?x)