X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=06b0bed41e68401a8667cbabdca0d9796ea8ca3d;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=a9552d1e75d9150f5bd15cda830ab52d0634ea18;hpb=efb7e11988b9bdc38e8d53051f2bca9592a06a46;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a9552d1e7..06b0bed41 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -7,13 +7,16 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( +from ..compat import ( compat_HTTPError, compat_urllib_parse, compat_urllib_request, - clean_html, - get_element_by_attribute, + compat_urlparse, +) +from ..utils import ( ExtractorError, + InAdvancePagedList, + int_or_none, RegexNotFoundError, std_headers, unsmuggle_url, @@ -29,7 +32,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): (username, password) = self._get_login_info() if username is None: if self._LOGIN_REQUIRED: - raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return self.report_login() login_url = 'https://vimeo.com/log_in' @@ -53,9 +56,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'''(?x) - (?P(?:https?:)?//)? + https?:// (?:(?:www|(?Pplayer))\.)? vimeo(?Ppro)?\.com/ + (?!channels/[^/?#]+/?(?:$|[?#])|album/) (?:.*?/)? (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? (?:videos?/)? @@ -74,45 +78,70 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): "uploader_id": "user7108434", "uploader": "Filippo Valsorda", "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + "duration": 10, }, }, { 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', - 'file': '68093876.mp4', 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 'note': 'Vimeo Pro video (#1197)', 'info_dict': { + 'id': '68093876', + 'ext': 'mp4', 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'description': 'md5:380943ec71b89736ff4bf27183233d09', + 'duration': 1595, }, }, { 'url': 'http://player.vimeo.com/video/54469442', - 'file': '54469442.mp4', 'md5': '619b811a4417aa4abe78dc653becf511', 'note': 'Videos that embed the url in the player page', 'info_dict': { - 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', + 'id': '54469442', + 'ext': 'mp4', + 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 'uploader': 'The BLN & Business of Software', 'uploader_id': 'theblnbusinessofsoftware', + 'duration': 3610, + 'description': None, }, }, { 'url': 'http://vimeo.com/68375962', - 'file': '68375962.mp4', 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 'note': 'Video protected with password', 'info_dict': { + 'id': '68375962', + 'ext': 'mp4', 'title': 'youtube-dl password protected test video', 'upload_date': '20130614', 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', + 'duration': 10, + 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', }, }, + { + 'url': 'http://vimeo.com/channels/keypeele/75629013', + 'md5': '2f86a05afe9d7abc0b9126d229bbe15d', + 'note': 'Video is freely available via original URL ' + 'and protected with password when accessed via http://vimeo.com/75629013', + 'info_dict': { + 'id': '75629013', + 'ext': 'mp4', + 'title': 'Key & Peele: Terrorist Interrogation', + 'description': 'md5:8678b246399b070816b12313e8b4eb5c', + 'uploader_id': 'atencio', + 'uploader': 'Peter Atencio', + 'duration': 187, + }, + }, { 'url': 'http://vimeo.com/76979871', 'md5': '3363dd6ffebe3784d56f4132317fd446', @@ -125,32 +154,38 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'upload_date': '20131015', 'uploader_id': 'staff', 'uploader': 'Vimeo Staff', + 'duration': 62, } }, + { + # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ + 'url': 'https://player.vimeo.com/video/98044508', + 'note': 'The js code contains assignments to the same variable as the config', + 'info_dict': { + 'id': '98044508', + 'ext': 'mp4', + 'title': 'Pier Solar OUYA Official Trailer', + 'uploader': 'Tulio Gonçalves', + 'uploader_id': 'user28849593', + }, + }, ] - @classmethod - def suitable(cls, url): - if VimeoChannelIE.suitable(url): - # Otherwise channel urls like http://vimeo.com/channels/31259 would - # match - return False - else: - return super(VimeoIE, cls).suitable(url) - def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') - data = compat_urllib_parse.urlencode({'password': password, - 'token': token}) + data = compat_urllib_parse.urlencode({ + 'password': password, + 'token': token, + }) # I didn't manage to use the password with https if url.startswith('https'): - pass_url = url.replace('https','http') + pass_url = url.replace('https', 'http') else: pass_url = url - password_request = compat_urllib_request.Request(pass_url+'/password', data) + password_request = compat_urllib_request.Request(pass_url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Cookie', 'xsrft=%s' % token) self._download_webpage(password_request, video_id, @@ -179,14 +214,15 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): if data is not None: headers = headers.copy() headers.update(data) + if 'Referer' not in headers: + headers['Referer'] = url # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + orig_url = url if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - else: - url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) @@ -220,11 +256,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): # We try to find out to which variable is assigned the config dic m_variable_name = re.search('(\w)\.video\.id', webpage) if m_variable_name is not None: - config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) + config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1)) else: config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config = self._search_regex(config_re, webpage, 'info section', - flags=re.DOTALL) + flags=re.DOTALL) config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): @@ -252,20 +288,29 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): if video_thumbnail is None: video_thumbs = config["video"].get("thumbs") if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] + _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video description - video_description = None - try: - video_description = get_element_by_attribute("itemprop", "description", webpage) - if video_description: video_description = clean_html(video_description) - except AssertionError as err: - # On some pages like (http://player.vimeo.com/video/54469442) the - # html tags are not closed, python 2.6 cannot handle it - if err.args[0] == 'we should not get here!': - pass - else: - raise + + video_description = self._html_search_regex( + r'(?s)]*>(.*?)', + webpage, 'description', default=None) + if not video_description: + video_description = self._html_search_meta( + 'description', webpage, default=None) + if not video_description and mobj.group('pro'): + orig_webpage = self._download_webpage( + orig_url, video_id, + note='Downloading webpage for description', + fatal=False) + if orig_webpage: + video_description = self._html_search_meta( + 'description', orig_webpage, default=None) + if not video_description and not mobj.group('player'): + self._downloader.report_warning('Cannot find video description') + + # Extract video duration + video_duration = int_or_none(config["video"].get("duration")) # Extract upload date video_upload_date = None @@ -304,7 +349,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): file_info = {} if video_url is None: video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, quality, codec_name.upper()) + % (video_id, sig, timestamp, quality, codec_name.upper()) files[key].append({ 'ext': codec_extension, @@ -338,6 +383,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'title': video_title, 'thumbnail': video_thumbnail, 'description': video_description, + 'duration': video_duration, 'formats': formats, 'webpage_url': url, 'view_count': view_count, @@ -349,9 +395,16 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): class VimeoChannelIE(InfoExtractor): IE_NAME = 'vimeo:channel' - _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P[^/]+)/?(\?.*)?$' + _VALID_URL = r'https?://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' + _TESTS = [{ + 'url': 'http://vimeo.com/channels/tributes', + 'info_dict': { + 'title': 'Vimeo Tributes', + }, + 'playlist_mincount': 25, + }] def _page_url(self, base_url, pagenum): return '%s/videos/page:%d/' % (base_url, pagenum) @@ -363,7 +416,7 @@ class VimeoChannelIE(InfoExtractor): video_ids = [] for pagenum in itertools.count(1): webpage = self._download_webpage( - self._page_url(base_url, pagenum) ,list_id, + self._page_url(base_url, pagenum), list_id, 'Downloading page %s' % pagenum) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: @@ -379,20 +432,21 @@ class VimeoChannelIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') + channel_id = mobj.group('id') return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'(?:https?://)?vimeo\.com/(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' - - @classmethod - def suitable(cls, url): - if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url): - return False - return super(VimeoUserIE, cls).suitable(url) + _TESTS = [{ + 'url': 'http://vimeo.com/nkistudio/videos', + 'info_dict': { + 'title': 'Nki', + }, + 'playlist_mincount': 66, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -402,8 +456,15 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P\d+)' + _VALID_URL = r'https?://vimeo\.com/album/(?P\d+)' _TITLE_RE = r'