X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdailymotion.py;h=f54ecc569cbe02714df09cdaf55c72ffd7129895;hb=18b4e04f1c663e0ea695f6501b860f85af9d7ca1;hp=9bf7a28ca83248ac61d3cc64d98058568b98dde6;hpb=36034aecc287f67a9f93fa00f374f45dcb0e2f77;p=youtube-dl diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 9bf7a28ca..f54ecc569 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,14 +1,39 @@ import re import json +import socket from .common import InfoExtractor +from .subtitles import NoAutoSubtitlesIE + from ..utils import ( + compat_http_client, + compat_urllib_error, compat_urllib_request, + compat_str, + get_element_by_attribute, + get_element_by_id, ExtractorError, ) -class DailymotionIE(InfoExtractor): + +class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): + + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + info = json.loads(sub_list) + if (info['total'] > 0): + sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + return sub_lang_list + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + +class DailymotionIE(DailyMotionSubtitlesIE): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' @@ -18,7 +43,7 @@ class DailymotionIE(InfoExtractor): u'file': u'x33vw9.mp4', u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'info_dict': { - u"uploader": u"Alex and Van .", + u"uploader": u"Alex and Van .", u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" } } @@ -60,14 +85,27 @@ class DailymotionIE(InfoExtractor): for key in ['stream_h264_hd1080_url','stream_h264_hd_url', 'stream_h264_hq_url','stream_h264_url', 'stream_h264_ld_url']: - if info.get(key):#key in info and info[key]: + if info.get(key): # key in info and info[key]: max_quality = key - self.to_screen(u'Using %s' % key) + self.to_screen(u'%s: Using %s' % (video_id, key)) break else: raise ExtractorError(u'Unable to extract video URL') video_url = info[max_quality] + # subtitles + video_subtitles = None + video_webpage = None + + if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_subtitles(video_id) + elif self._downloader.params.get('writeautomaticsub', False): + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id) + return + return [{ 'id': video_id, 'url': video_url, @@ -75,5 +113,34 @@ class DailymotionIE(InfoExtractor): 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'ext': video_extension, + 'subtitles': video_subtitles, 'thumbnail': info['thumbnail_url'] }] + + +class DailymotionPlaylistIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' + _MORE_PAGES_INDICATOR = r'' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + video_ids = [] + + for pagenum in itertools.count(1): + webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), + playlist_id, u'Downloading page %s' % pagenum) + + playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) + video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) + + if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: + break + + entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + for video_id in video_ids] + return {'_type': 'playlist', + 'id': playlist_id, + 'title': get_element_by_id(u'playlist_name', webpage), + 'entries': entries, + }