Merge branch 'master' into subtitles_rework
[youtube-dl] / youtube_dl / extractor / dailymotion.py
index 8fab16005781c54012d0c048979193bd4a5bcb10..f54ecc569cbe02714df09cdaf55c72ffd7129895 100644 (file)
@@ -82,8 +82,8 @@ class DailymotionIE(DailyMotionSubtitlesIE):
 
         # TODO: support choosing qualities
 
-        for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
-                    'stream_h264_hq_url', 'stream_h264_url',
+        for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
+                    'stream_h264_hq_url','stream_h264_url',
                     'stream_h264_ld_url']:
             if info.get(key):  # key in info and info[key]:
                 max_quality = key
@@ -116,3 +116,31 @@ class DailymotionIE(DailyMotionSubtitlesIE):
             'subtitles':    video_subtitles,
             'thumbnail': info['thumbnail_url']
         }]
+
+
+class DailymotionPlaylistIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
+    _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id =  mobj.group('id')
+        video_ids = []
+
+        for pagenum in itertools.count(1):
+            webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
+                                             playlist_id, u'Downloading page %s' % pagenum)
+
+            playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
+            video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
+
+            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+                break
+
+        entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
+                   for video_id in video_ids]
+        return {'_type': 'playlist',
+                'id': playlist_id,
+                'title': get_element_by_id(u'playlist_name', webpage),
+                'entries': entries,
+                }