X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=4e8fef1655cf553e602d0719a2e42f61e9d77ac2;hb=bbafbe20c233d00e86fc87a1b1ccab8cf9e88232;hp=d465bf20b6d65b7b5cd3a0545af676c2c710a07d;hpb=231516b6c983561cdfb7d58a07bb78a5fa132e10;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index d465bf20b..4e8fef165 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,11 +16,20 @@ from ..utils import ( unsmuggle_url, ) + class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' + _VALID_URL = r'''(?x) + (?Phttps?://)? + (?:(?:www|(?Pplayer))\.)? + vimeo(?Ppro)?\.com/ + (?:.*?/)? + (?Pplay_redirect_hls\?clip_id=)? + (?:videos?/)? + (?P[0-9]+) + /?(?:[?].*)?(?:[#].*)?$''' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ @@ -115,7 +124,7 @@ class VimeoIE(InfoExtractor): def _real_initialize(self): self._login() - def _real_extract(self, url, new_video=True): + def _real_extract(self, url): url, data = unsmuggle_url(url) headers = std_headers if data is not None: @@ -151,8 +160,14 @@ class VimeoIE(InfoExtractor): config = json.loads(config_json) except RegexNotFoundError: # For pro videos or player.vimeo.com urls - config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], - webpage, u'info section', flags=re.DOTALL) + # We try to find out to which variable is assigned the config dic + m_variable_name = re.search('(\w)\.video\.id', webpage) + if m_variable_name is not None: + config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) + else: + config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] + config = self._search_regex(config_re, webpage, u'info section', + flags=re.DOTALL) config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): @@ -196,6 +211,16 @@ class VimeoIE(InfoExtractor): if mobj is not None: video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) + try: + view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count')) + like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count')) + comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count')) + except RegexNotFoundError: + # This info is only available in vimeo.com/{id} urls + view_count = None + like_count = None + comment_count = None + # Vimeo specific: extract request signature and timestamp sig = config['request']['signature'] timestamp = config['request']['timestamp'] @@ -242,6 +267,9 @@ class VimeoIE(InfoExtractor): 'description': video_description, 'formats': formats, 'webpage_url': url, + 'view_count': view_count, + 'like_count': like_count, + 'comment_count': comment_count, } @@ -249,25 +277,77 @@ class VimeoChannelIE(InfoExtractor): IE_NAME = u'vimeo:channel' _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P[^/]+)' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') - video_ids = [] + def _page_url(self, base_url, pagenum): + return '%s/videos/page:%d/' % (base_url, pagenum) + def _extract_list_title(self, webpage): + return self._html_search_regex(self._TITLE_RE, webpage, u'list title') + + def _extract_videos(self, list_id, base_url): + video_ids = [] for pagenum in itertools.count(1): - webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum), - channel_id, u'Downloading page %s' % pagenum) + webpage = self._download_webpage( + self._page_url(base_url, pagenum) ,list_id, + u'Downloading page %s' % pagenum) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') for video_id in video_ids] - channel_title = self._html_search_regex(r'(.*?)' % channel_id, - webpage, u'channel title') return {'_type': 'playlist', - 'id': channel_id, - 'title': channel_title, + 'id': list_id, + 'title': self._extract_list_title(webpage), 'entries': entries, } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) + + +class VimeoUserIE(VimeoChannelIE): + IE_NAME = u'vimeo:user' + _VALID_URL = r'(?:https?://)?vimeo.\com/(?P[^/]+)' + _TITLE_RE = r']+?class="user">([^<>]+?)' + + @classmethod + def suitable(cls, url): + if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url): + return False + return super(VimeoUserIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + return self._extract_videos(name, 'http://vimeo.com/%s' % name) + + +class VimeoAlbumIE(VimeoChannelIE): + IE_NAME = u'vimeo:album' + _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P\d+)' + _TITLE_RE = r'