Merge branch 'akamai_pv' of https://github.com/remitamine/youtube-dl into remitamine...
[youtube-dl] / youtube_dl / extractor / youtube.py
index 44c1191bd2a2fe5b6dbadf4ee9c0ae1f9b4c869e..44f98d294909a75f44f9c01e3a2ce0e7c66d86b5 100644 (file)
@@ -125,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         if login_results is False:
             return False
 
+        error_msg = self._html_search_regex(
+            r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
+            login_results, 'error message', default=None)
+        if error_msg:
+            raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
+
         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 
@@ -1818,20 +1824,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a single video
         # the id of the playlist is just 'RD' + video_id
-        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(
-            url, playlist_id, 'Downloading Youtube mix')
+        ids = []
+        last_id = playlist_id[-11:]
+        for n in itertools.count(1):
+            url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+            webpage = self._download_webpage(
+                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+            new_ids = orderedSet(re.findall(
+                r'''(?xs)data-video-username=".*?".*?
+                           href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+                webpage))
+            # Fetch new pages until all the videos are repeated, it seems that
+            # there are always 51 unique videos.
+            new_ids = [_id for _id in new_ids if _id not in ids]
+            if not new_ids:
+                break
+            ids.extend(new_ids)
+            last_id = ids[-1]
+
+        url_results = self._ids_to_results(ids)
+
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
         title_span = (
             search_title('playlist-title') or
             search_title('title long-title') or
             search_title('title'))
         title = clean_html(title_span)
-        ids = orderedSet(re.findall(
-            r'''(?xs)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
-            webpage))
-        url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)