[youtube:playlist] Fix mixes extraction (fixes #2444)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 23 Feb 2014 16:17:36 +0000 (17:17 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sun, 23 Feb 2014 16:17:36 +0000 (17:17 +0100)
youtube_dl/extractor/youtube.py

index cf0bd4ae35112aa9e83175e04366b778ef0c39e7..f868b19290a65ec0992ebbf6da80cafc7699f072 100644 (file)
@@ -1488,11 +1488,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         # the id of the playlist is just 'RD' + video_id
         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
-        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
-            get_element_by_attribute('class', 'title ', webpage))
+        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
+        title_span = (search_title('playlist-title') or
+            search_title('title long-title') or search_title('title'))
         title = clean_html(title_span)
-        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
-        ids = orderedSet(re.findall(video_re, webpage))
+        video_re = r'''(?x)data-index="\d+".*?
+                       data-video-username="(.*?)".*?
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
+        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+        # Some of the videos may have beend deleted, their username field is empty
+        ids = [video_id for (username, video_id) in matches if username]
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)