X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fdailymotion.py;h=745971900b23f1cbf44ce286b9c4a4a7e527a3da;hb=bf45295c5387d0d90b97ca34d377cdaa07c71bcb;hp=842d9a25994ef4c7ca4837d919cc66bd6d0a7d51;hpb=f60b9803a473da8e324313d01af91e5676792c77;p=youtube-dl diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 842d9a259..745971900 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -22,9 +22,11 @@ from ..utils import ( parse_iso8601, sanitized_Request, str_to_int, + try_get, unescapeHTML, + update_url_query, + url_or_none, urlencode_postdata, - try_get, ) @@ -46,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor): - _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P[^/?_]+)' + _VALID_URL = r'''(?ix) + https?:// + (?: + (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| + (?:www\.)?lequipe\.fr/video + ) + /(?P[^/?_]+) + ''' IE_NAME = 'dailymotion' _FORMATS = [ @@ -131,14 +140,26 @@ class DailymotionIE(DailymotionBaseInfoExtractor): }, { 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/x791mem', + 'only_matching': True, + }, { + 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): + urls = [] # Look for embedded Dailymotion player - matches = re.findall( - r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) - return list(map(lambda m: unescapeHTML(m[1]), matches)) + # https://developer.dailymotion.com/player#player-parameters + for mobj in re.finditer( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage): + urls.append(unescapeHTML(mobj.group('url'))) + for mobj in re.finditer( + r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P[0-9a-zA-Z]+).+?}\s*\);', webpage): + urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) @@ -164,23 +185,33 @@ class DailymotionIE(DailymotionBaseInfoExtractor): webpage, 'comment count', default=None)) player_v5 = self._search_regex( - [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 + [r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', r'buildPlayer\(({.+?})\);', r'var\s+config\s*=\s*({.+?});', - # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580) + # New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580) r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: - player = self._parse_json(player_v5, video_id) - metadata = try_get( - player, lambda x: x['metadata'], dict) or self._download_json( - 'http://www.dailymotion.com/player/metadata/video/%s' % video_id, video_id, query={ - 'integration': 'inline', - 'GK_PV5_NEON': '1', - }) - - if metadata.get('error', {}).get('type') == 'password_protected': + player = self._parse_json(player_v5, video_id, fatal=False) or {} + metadata = try_get(player, lambda x: x['metadata'], dict) + if not metadata: + metadata_url = url_or_none(try_get( + player, lambda x: x['context']['metadata_template_url1'])) + if metadata_url: + metadata_url = metadata_url.replace(':videoId', video_id) + else: + metadata_url = update_url_query( + 'https://www.dailymotion.com/player/metadata/video/%s' + % video_id, { + 'embedder': url, + 'integration': 'inline', + 'GK_PV5_NEON': '1', + }) + metadata = self._download_json( + metadata_url, video_id, 'Downloading metadata JSON') + + if try_get(metadata, lambda x: x['error']['type']) == 'password_protected': password = self._downloader.params.get('videopassword') if password: r = int(metadata['id'][1:], 36)