X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=55f6cd0d8e1d9e7de2fd49f0e123b23e47496f13;hb=829476b80a86819c79511f60f4fc25f09ab186b7;hp=4b4b472a50da08aeeae9d1f93ff74e1fcc6d56a1;hpb=1060425cbb4beffc2a36aa361b03db60df0e40f8;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4b4b472a5..55f6cd0d8 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -17,10 +17,39 @@ from ..utils import ( RegexNotFoundError, std_headers, unsmuggle_url, + urlencode_postdata, + int_or_none, ) -class VimeoIE(SubtitlesInfoExtractor): +class VimeoBaseInfoExtractor(InfoExtractor): + _NETRC_MACHINE = 'vimeo' + _LOGIN_REQUIRED = False + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + if self._LOGIN_REQUIRED: + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) + return + self.report_login() + login_url = 'https://vimeo.com/log_in' + webpage = self._download_webpage(login_url, None, False) + token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + data = urlencode_postdata({ + 'email': username, + 'password': password, + 'action': 'login', + 'service': 'vimeo', + 'token': token, + }) + login_request = compat_urllib_request.Request(login_url, data) + login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + login_request.add_header('Cookie', 'xsrft=%s' % token) + self._download_webpage(login_request, None, False, 'Wrong login info') + + +class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs @@ -33,7 +62,6 @@ class VimeoIE(SubtitlesInfoExtractor): (?:videos?/)? (?P[0-9]+) /?(?:[?&].*)?(?:[#].*)?$''' - _NETRC_MACHINE = 'vimeo' IE_NAME = 'vimeo' _TESTS = [ { @@ -47,45 +75,67 @@ class VimeoIE(SubtitlesInfoExtractor): "uploader_id": "user7108434", "uploader": "Filippo Valsorda", "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + "duration": 10, }, }, { 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', - 'file': '68093876.mp4', 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 'note': 'Vimeo Pro video (#1197)', 'info_dict': { + 'id': '68093876', + 'ext': 'mp4', 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'duration': 1595, }, }, { 'url': 'http://player.vimeo.com/video/54469442', - 'file': '54469442.mp4', 'md5': '619b811a4417aa4abe78dc653becf511', 'note': 'Videos that embed the url in the player page', 'info_dict': { - 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', + 'id': '54469442', + 'ext': 'mp4', + 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 'uploader': 'The BLN & Business of Software', 'uploader_id': 'theblnbusinessofsoftware', + 'duration': 3610, }, }, { 'url': 'http://vimeo.com/68375962', - 'file': '68375962.mp4', 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 'note': 'Video protected with password', 'info_dict': { + 'id': '68375962', + 'ext': 'mp4', 'title': 'youtube-dl password protected test video', 'upload_date': '20130614', 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', + 'duration': 10, }, 'params': { 'videopassword': 'youtube-dl', }, }, + { + 'url': 'http://vimeo.com/channels/keypeele/75629013', + 'md5': '2f86a05afe9d7abc0b9126d229bbe15d', + 'note': 'Video is freely available via original URL ' + 'and protected with password when accessed via http://vimeo.com/75629013', + 'info_dict': { + 'id': '75629013', + 'ext': 'mp4', + 'title': 'Key & Peele: Terrorist Interrogation', + 'description': 'md5:8678b246399b070816b12313e8b4eb5c', + 'uploader_id': 'atencio', + 'uploader': 'Peter Atencio', + 'duration': 187, + }, + }, { 'url': 'http://vimeo.com/76979871', 'md5': '3363dd6ffebe3784d56f4132317fd446', @@ -98,42 +148,48 @@ class VimeoIE(SubtitlesInfoExtractor): 'upload_date': '20131015', 'uploader_id': 'staff', 'uploader': 'Vimeo Staff', + 'duration': 62, } }, + { + 'note': 'video player needs Referer', + 'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053', + 'md5': '6295fdab8f4bf6a002d058b2c6dce276', + 'info_dict': { + 'id': '91613211', + 'ext': 'mp4', + 'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn', + 'uploader': 'DevWeek Events', + 'duration': 2773, + 'thumbnail': 're:^https?://.*\.jpg$', + } + } ] - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - self.report_login() - login_url = 'https://vimeo.com/log_in' - webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') - data = compat_urllib_parse.urlencode({'email': username, - 'password': password, - 'action': 'login', - 'service': 'vimeo', - 'token': token, - }) - login_request = compat_urllib_request.Request(login_url, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - login_request.add_header('Cookie', 'xsrft=%s' % token) - self._download_webpage(login_request, None, False, 'Wrong login info') + @classmethod + def suitable(cls, url): + if VimeoChannelIE.suitable(url): + # Otherwise channel urls like http://vimeo.com/channels/31259 would + # match + return False + else: + return super(VimeoIE, cls).suitable(url) def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') - data = compat_urllib_parse.urlencode({'password': password, - 'token': token}) + data = compat_urllib_parse.urlencode({ + 'password': password, + 'token': token, + }) # I didn't manage to use the password with https if url.startswith('https'): - pass_url = url.replace('https','http') + pass_url = url.replace('https', 'http') else: pass_url = url - password_request = compat_urllib_request.Request(pass_url+'/password', data) + password_request = compat_urllib_request.Request(pass_url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Cookie', 'xsrft=%s' % token) self._download_webpage(password_request, video_id, @@ -162,14 +218,14 @@ class VimeoIE(SubtitlesInfoExtractor): if data is not None: headers = headers.copy() headers.update(data) + if 'Referer' not in headers: + headers['Referer'] = url # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - else: - url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) @@ -235,13 +291,14 @@ class VimeoIE(SubtitlesInfoExtractor): if video_thumbnail is None: video_thumbs = config["video"].get("thumbs") if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] + _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video description video_description = None try: - video_description = get_element_by_attribute("itemprop", "description", webpage) - if video_description: video_description = clean_html(video_description) + video_description = get_element_by_attribute("class", "description_wrapper", webpage) + if video_description: + video_description = clean_html(video_description) except AssertionError as err: # On some pages like (http://player.vimeo.com/video/54469442) the # html tags are not closed, python 2.6 cannot handle it @@ -250,6 +307,9 @@ class VimeoIE(SubtitlesInfoExtractor): else: raise + # Extract video duration + video_duration = int_or_none(config["video"].get("duration")) + # Extract upload date video_upload_date = None mobj = re.search(r'[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P[^/]+)/?(\?.*)?$' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' @@ -346,7 +407,7 @@ class VimeoChannelIE(InfoExtractor): video_ids = [] for pagenum in itertools.count(1): webpage = self._download_webpage( - self._page_url(base_url, pagenum) ,list_id, + self._page_url(base_url, pagenum), list_id, 'Downloading page %s' % pagenum) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: @@ -362,7 +423,7 @@ class VimeoChannelIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') + channel_id = mobj.group('id') return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) @@ -429,3 +490,25 @@ class VimeoReviewIE(InfoExtractor): video_id = mobj.group('id') player_url = 'https://player.vimeo.com/player/' + video_id return self.url_result(player_url, 'Vimeo', video_id) + + +class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): + IE_NAME = 'vimeo:watchlater' + IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' + _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater' + _LOGIN_REQUIRED = True + _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<' + + def _real_initialize(self): + self._login() + + def _page_url(self, base_url, pagenum): + url = '%s/page:%d/' % (base_url, pagenum) + request = compat_urllib_request.Request(url) + # Set the header to get a partial html page with the ids, + # the normal page doesn't contain them. + request.add_header('X-Requested-With', 'XMLHttpRequest') + return request + + def _real_extract(self, url): + return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')