X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=3baa2d075543fe951db281e081da8527aeced13c;hb=68217024e83c8e7965f2800e9ff7a9575f049b5c;hp=37e1da70d7724368027d54807a527a656969fdbb;hpb=ec85ded83cbfa652ba94cb080aab52d8b270212a;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 37e1da70d..3baa2d075 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,17 +16,18 @@ from ..utils import ( ExtractorError, InAdvancePagedList, int_or_none, + merge_dicts, NO_DEFAULT, RegexNotFoundError, sanitized_Request, smuggle_url, std_headers, - unified_strdate, + try_get, + unified_timestamp, unsmuggle_url, urlencode_postdata, unescapeHTML, parse_filesize, - try_get, ) @@ -36,26 +37,35 @@ class VimeoBaseInfoExtractor(InfoExtractor): _LOGIN_URL = 'https://vimeo.com/log_in' def _login(self): - (username, password) = self._get_login_info() + username, password = self._get_login_info() if username is None: if self._LOGIN_REQUIRED: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return - self.report_login() - webpage = self._download_webpage(self._LOGIN_URL, None, False) + webpage = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata({ + data = { 'action': 'login', 'email': username, 'password': password, 'service': 'vimeo', 'token': token, - }) - login_request = sanitized_Request(self._LOGIN_URL, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - login_request.add_header('Referer', self._LOGIN_URL) + } self._set_vimeo_cookie('vuid', vuid) - self._download_webpage(login_request, None, False, 'Wrong login info') + try: + self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': self._LOGIN_URL, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418: + raise ExtractorError( + 'Unable to log in: bad username or password', + expected=True) + raise ExtractorError('Unable to log in') def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword') @@ -142,10 +152,25 @@ class VimeoBaseInfoExtractor(InfoExtractor): note='Downloading %s m3u8 information' % cdn_name, fatal=False)) elif files_type == 'dash': - formats.extend(self._extract_mpd_formats( - manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id, - 'Downloading %s MPD information' % cdn_name, - fatal=False)) + mpd_pattern = r'/%s/(?:sep/)?video/' % video_id + mpd_manifest_urls = [] + if re.search(mpd_pattern, manifest_url): + for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): + mpd_manifest_urls.append((format_id + suffix, re.sub( + mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url))) + else: + mpd_manifest_urls = [(format_id, manifest_url)] + for f_id, m_url in mpd_manifest_urls: + mpd_formats = self._extract_mpd_formats( + m_url.replace('/master.json', '/master.mpd'), video_id, f_id, + 'Downloading %s MPD information' % cdn_name, + fatal=False) + for f in mpd_formats: + if f.get('vcodec') == 'none': + f['preference'] = -50 + elif f.get('acodec') == 'none': + f['preference'] = -40 + formats.extend(mpd_formats) subtitles = {} text_tracks = config['request'].get('text_tracks') @@ -203,12 +228,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '56015672', 'ext': 'mp4', 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - 'description': 'md5:2d3305bad981a06ff79f027f19865021', + 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479', + 'timestamp': 1355990239, 'upload_date': '20121220', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 'uploader_id': 'user7108434', 'uploader': 'Filippo Valsorda', 'duration': 10, + 'license': 'by-sa', }, }, { @@ -249,12 +276,13 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '68375962', 'ext': 'mp4', 'title': 'youtube-dl password protected test video', + 'timestamp': 1371200155, 'upload_date': '20130614', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.', + 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', }, 'params': { 'videopassword': 'youtube-dl', @@ -271,7 +299,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', - 'upload_date': '20130927', + 'timestamp': 1380339469, + 'upload_date': '20130928', 'duration': 187, }, }, @@ -283,6 +312,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'The New Vimeo Player (You Know, For Videos)', 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', + 'timestamp': 1381846109, 'upload_date': '20131015', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 'uploader_id': 'staff', @@ -306,7 +336,7 @@ class VimeoIE(VimeoBaseInfoExtractor): { # contains original format 'url': 'https://vimeo.com/33951933', - 'md5': '2d9f5475e0537f013d0073e812ab89e6', + 'md5': '53c688fa95a55bf4b7293d37a89c5c53', 'info_dict': { 'id': '33951933', 'ext': 'mp4', @@ -314,6 +344,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'The DMCI', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 'uploader_id': 'dmci', + 'timestamp': 1324343742, 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', }, @@ -324,11 +355,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/channels/tributes/6213729', 'info_dict': { 'id': '6213729', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'Vimeo Tribute: The Shining', 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', }, @@ -338,7 +370,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { @@ -390,7 +422,7 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( @@ -446,11 +478,12 @@ class VimeoIE(VimeoBaseInfoExtractor): request = sanitized_Request(url, headers=headers) try: webpage, urlh = self._download_webpage_handle(request, video_id) + redirect_url = compat_str(urlh.geturl()) # Some URLs redirect to ondemand can't be extracted with # this extractor right away thus should be passed through # ondemand extractor (e.g. https://vimeo.com/73445910) - if VimeoOndemandIE.suitable(urlh.geturl()): - return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key()) + if VimeoOndemandIE.suitable(redirect_url): + return self.url_result(redirect_url, VimeoOndemandIE.ie_key()) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() @@ -477,6 +510,9 @@ class VimeoIE(VimeoBaseInfoExtractor): '%s said: %s' % (self.IE_NAME, seed_status['title']), expected=True) + cc_license = None + timestamp = None + # Extract the config JSON try: try: @@ -490,8 +526,12 @@ class VimeoIE(VimeoBaseInfoExtractor): vimeo_clip_page_config = self._search_regex( r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, 'vimeo clip page config') - config_url = self._parse_json( - vimeo_clip_page_config, video_id)['player']['config_url'] + page_config = self._parse_json(vimeo_clip_page_config, video_id) + config_url = page_config['player']['config_url'] + cc_license = page_config.get('cc_license') + timestamp = try_get( + page_config, lambda x: x['clip']['uploaded_on'], + compat_str) config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -512,15 +552,15 @@ class VimeoIE(VimeoBaseInfoExtractor): if re.search(r']+?id="pw_form"', webpage) is not None: if '_video_password_verified' in data: raise ExtractorError('video password verification failed!') - self._verify_video_password(url, video_id, webpage) + self._verify_video_password(redirect_url, video_id, webpage) return self._real_extract( - smuggle_url(url, {'_video_password_verified': 'verified'})) + smuggle_url(redirect_url, {'_video_password_verified': 'verified'})) else: raise ExtractorError('Unable to extract info section', cause=e) else: if config.get('view') == 4: - config = self._verify_player_video_password(url, video_id) + config = self._verify_player_video_password(redirect_url, video_id) def is_rented(): if '>You rented this title.<' in webpage: @@ -560,10 +600,10 @@ class VimeoIE(VimeoBaseInfoExtractor): self._downloader.report_warning('Cannot find video description') # Extract upload date - video_upload_date = None - mobj = re.search(r']+datetime="([^"]+)"', webpage) - if mobj is not None: - video_upload_date = unified_strdate(mobj.group(1)) + if not timestamp: + timestamp = self._search_regex( + r']+datetime="([^"]+)"', webpage, + 'timestamp', default=None) try: view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) @@ -586,7 +626,10 @@ class VimeoIE(VimeoBaseInfoExtractor): if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = source_file.get('extension', determine_ext(download_url)).lower() + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() formats.append({ 'url': download_url, 'ext': ext, @@ -597,19 +640,30 @@ class VimeoIE(VimeoBaseInfoExtractor): 'preference': 1, }) - info_dict = self._parse_config(config, video_id) - formats.extend(info_dict['formats']) + info_dict_config = self._parse_config(config, video_id) + formats.extend(info_dict_config['formats']) self._vimeo_sort_formats(formats) - info_dict.update({ + + json_ld = self._search_json_ld(webpage, video_id, default={}) + + if not cc_license: + cc_license = self._search_regex( + r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'license', default=None, group='license') + + info_dict = { 'id': video_id, 'formats': formats, - 'upload_date': video_upload_date, + 'timestamp': unified_timestamp(timestamp), 'description': video_description, 'webpage_url': url, 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, - }) + 'license': cc_license, + } + + info_dict = merge_dicts(info_dict, info_dict_config, json_ld) return info_dict @@ -629,6 +683,9 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + 'params': { + 'format': 'best[protocol=https]', + }, }, { # requires Referer to be passed along with og:video:url 'url': 'https://vimeo.com/ondemand/36938/126682985', @@ -727,12 +784,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): # Try extracting href first since not all videos are available via # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) clips = re.findall( - r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)', webpage) + r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) if clips: - for video_id, video_url in clips: + for video_id, video_url, video_title in clips: yield self.url_result( compat_urlparse.urljoin(base_url, video_url), - VimeoIE.ie_key(), video_id=video_id) + VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): @@ -881,10 +938,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config_url = self._html_search_regex( + r'data-config-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'config URL', default=None, group='url') + if not config_url: + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( @@ -928,10 +989,10 @@ class VimeoWatchLaterIE(VimeoChannelIE): class VimeoLikesIE(InfoExtractor): - _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P[0-9]+)/likes/?(?:$|[?#]|sort:)' + _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' - _TEST = { + _TESTS = [{ 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, 'info_dict': { @@ -939,7 +1000,10 @@ class VimeoLikesIE(InfoExtractor): 'description': 'See all the videos urza likes', 'title': 'Videos urza likes', }, - } + }, { + 'url': 'https://vimeo.com/stormlapse/likes', + 'only_matching': True, + }] def _real_extract(self, url): user_id = self._match_id(url) @@ -948,7 +1012,7 @@ class VimeoLikesIE(InfoExtractor): self._search_regex( r'''(?x)
  • .*?
  • \s* - ''', webpage, 'page count'), + ''', webpage, 'page count', default=1), 'page count', fatal=True) PAGE_SIZE = 12 title = self._html_search_regex( @@ -956,7 +1020,7 @@ class VimeoLikesIE(InfoExtractor): description = self._html_search_meta('description', webpage) def _get_page(idx): - page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % ( + page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % ( user_id, idx + 1) webpage = self._download_webpage( page_url, user_id, @@ -976,7 +1040,7 @@ class VimeoLikesIE(InfoExtractor): return { '_type': 'playlist', - 'id': 'user%s_likes' % user_id, + 'id': '%s_likes' % user_id, 'title': title, 'description': description, 'entries': pl,