X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=c3623fcbe6b01493c5ec2115f4fe5f2d32737e59;hb=4ddba33f78a11e100653771d10038a7d8e90ec9f;hp=293dad3c02e92b62dad48fe63f6fbf248fc14375;hpb=5cc14c2fd74a721be0effd5bc06a76164a9c97a1;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 293dad3c0..c3623fcbe 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,11 +16,20 @@ from ..utils import ( unsmuggle_url, ) + class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' + _VALID_URL = r'''(?x) + (?Phttps?://)? + (?:(?:www|(?Pplayer))\.)? + vimeo(?Ppro)?\.com/ + (?:.*?/)? + (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? + (?:videos?/)? + (?P[0-9]+) + /?(?:[?&].*)?(?:[#].*)?$''' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ @@ -115,7 +124,7 @@ class VimeoIE(InfoExtractor): def _real_initialize(self): self._login() - def _real_extract(self, url, new_video=True): + def _real_extract(self, url): url, data = unsmuggle_url(url) headers = std_headers if data is not None: @@ -151,8 +160,14 @@ class VimeoIE(InfoExtractor): config = json.loads(config_json) except RegexNotFoundError: # For pro videos or player.vimeo.com urls - config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'], - webpage, u'info section', flags=re.DOTALL) + # We try to find out to which variable is assigned the config dic + m_variable_name = re.search('(\w)\.video\.id', webpage) + if m_variable_name is not None: + config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) + else: + config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] + config = self._search_regex(config_re, webpage, u'info section', + flags=re.DOTALL) config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): @@ -267,6 +282,9 @@ class VimeoChannelIE(InfoExtractor): def _page_url(self, base_url, pagenum): return '%s/videos/page:%d/' % (base_url, pagenum) + def _extract_list_title(self, webpage): + return self._html_search_regex(self._TITLE_RE, webpage, u'list title') + def _extract_videos(self, list_id, base_url): video_ids = [] for pagenum in itertools.count(1): @@ -279,11 +297,9 @@ class VimeoChannelIE(InfoExtractor): entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') for video_id in video_ids] - list_title = self._html_search_regex(self._TITLE_RE, webpage, - u'list title') return {'_type': 'playlist', 'id': list_id, - 'title': list_title, + 'title': self._extract_list_title(webpage), 'entries': entries, } @@ -300,7 +316,7 @@ class VimeoUserIE(VimeoChannelIE): @classmethod def suitable(cls, url): - if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url): + if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url): return False return super(VimeoUserIE, cls).suitable(url) @@ -322,3 +338,16 @@ class VimeoAlbumIE(VimeoChannelIE): mobj = re.match(self._VALID_URL, url) album_id = mobj.group('id') return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) + + +class VimeoGroupsIE(VimeoAlbumIE): + IE_NAME = u'vimeo:group' + _VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P[^/]+)' + + def _extract_list_title(self, webpage): + return self._og_search_title(webpage) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)