X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=ca716c8f530326907986ceb17a427c0b28c1102e;hb=8b8a39e279b02926197230024e64a50d7995b95d;hp=97590d220db41e02cab6be7755cebc548ace70d2;hpb=35a3ff1d337edd527c73db133d87ed23ca4469f5;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 97590d220..ca716c8f5 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,11 +8,11 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, compat_urllib_request, compat_urlparse, ) from ..utils import ( + encode_dict, ExtractorError, InAdvancePagedList, int_or_none, @@ -39,27 +39,28 @@ class VimeoBaseInfoExtractor(InfoExtractor): return self.report_login() webpage = self._download_webpage(self._LOGIN_URL, None, False) - token = self._extract_xsrft(webpage) - vuid = self._search_regex( - r'["\']vuid["\']\s*:\s*(["\'])(?P.+?)\1', - webpage, 'vuid', group='vuid') - data = urlencode_postdata({ + token, vuid = self._extract_xsrft_and_vuid(webpage) + data = urlencode_postdata(encode_dict({ 'action': 'login', 'email': username, 'password': password, 'service': 'vimeo', 'token': token, - }) + })) login_request = compat_urllib_request.Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Cookie', 'vuid=%s' % vuid) login_request.add_header('Referer', self._LOGIN_URL) self._download_webpage(login_request, None, False, 'Wrong login info') - def _extract_xsrft(self, webpage): - return self._search_regex( + def _extract_xsrft_and_vuid(self, webpage): + xsrft = self._search_regex( r'xsrft\s*[=:]\s*(?P["\'])(?P.+?)(?P=q)', webpage, 'login token', group='xsrft') + vuid = self._search_regex( + r'["\']vuid["\']\s*:\s*(["\'])(?P.+?)\1', + webpage, 'vuid', group='vuid') + return xsrft, vuid class VimeoIE(VimeoBaseInfoExtractor): @@ -84,12 +85,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'info_dict': { 'id': '56015672', 'ext': 'mp4', - "upload_date": "20121220", - "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - "uploader_id": "user7108434", - "uploader": "Filippo Valsorda", - "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - "duration": 10, + 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + 'description': 'md5:2d3305bad981a06ff79f027f19865021', + 'upload_date': '20121220', + 'uploader_id': 'user7108434', + 'uploader': 'Filippo Valsorda', + 'duration': 10, }, }, { @@ -102,7 +103,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', - 'description': 'md5:380943ec71b89736ff4bf27183233d09', + 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30', 'duration': 1595, }, }, @@ -132,7 +133,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', + 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026', }, 'params': { 'videopassword': 'youtube-dl', @@ -156,7 +157,6 @@ class VimeoIE(VimeoBaseInfoExtractor): }, { 'url': 'http://vimeo.com/76979871', - 'md5': '3363dd6ffebe3784d56f4132317fd446', 'note': 'Video with subtitles', 'info_dict': { 'id': '76979871', @@ -181,6 +181,11 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user28849593', }, }, + { + 'url': 'https://vimeo.com/109815029', + 'note': 'Video not completely processed, "failed" seed status', + 'only_matching': True, + }, ] @staticmethod @@ -202,16 +207,17 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._extract_xsrft(webpage) - data = urlencode_postdata({ + token, vuid = self._extract_xsrft_and_vuid(webpage) + data = urlencode_postdata(encode_dict({ 'password': password, 'token': token, - }) + })) if url.startswith('http://'): # vimeo only supports https now, but the user can give an http url url = url.replace('http://', 'https://') password_request = compat_urllib_request.Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid) password_request.add_header('Referer', url) return self._download_webpage( password_request, video_id, @@ -221,7 +227,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = compat_urllib_parse.urlencode({'password': password}) + data = urlencode_postdata(encode_dict({'password': password})) pass_url = url + '/check-password' password_request = compat_urllib_request.Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -272,20 +278,30 @@ class VimeoIE(VimeoBaseInfoExtractor): self.report_extraction(video_id) vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*({.+?});', webpage, + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, 'vimeo config', default=None) if vimeo_config: seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, seed_status['title']), + '%s said: %s' % (self.IE_NAME, seed_status['title']), expected=True) # Extract the config JSON try: try: config_url = self._html_search_regex( - r' data-config-url="(.+?)"', webpage, 'config URL') + r' data-config-url="(.+?)"', webpage, + 'config URL', default=None) + if not config_url: + # Sometimes new react-based page is served instead of old one that require + # different config URL extraction approach (see + # https://github.com/rg3/youtube-dl/pull/7209) + vimeo_clip_page_config = self._search_regex( + r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, + 'vimeo clip page config') + config_url = self._parse_json( + vimeo_clip_page_config, video_id)['player']['config_url'] config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -402,7 +418,10 @@ class VimeoIE(VimeoBaseInfoExtractor): formats = [] m3u8_url = config_files.get('hls', {}).get('all') if m3u8_url: - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False) + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) for key in ('other', 'sd', 'hd'): formats += files[key] self._sort_formats(formats) @@ -466,15 +485,16 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): if password is None: raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) fields = self._hidden_inputs(login_form) - token = self._extract_xsrft(webpage) + token, vuid = self._extract_xsrft_and_vuid(webpage) fields['token'] = token fields['password'] = password - post = urlencode_postdata(fields) + post = urlencode_postdata(encode_dict(fields)) password_path = self._search_regex( r'action="([^"]+)"', login_form, 'password URL') password_url = compat_urlparse.urljoin(page_url, password_path) password_request = compat_urllib_request.Request(password_url, post) password_request.add_header('Content-type', 'application/x-www-form-urlencoded') + password_request.add_header('Cookie', 'vuid=%s' % vuid) self._set_cookie('vimeo.com', 'xsrft', token) return self._download_webpage(