X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=06b0bed41e68401a8667cbabdca0d9796ea8ca3d;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=403d0bb28ebb4e3d3f1d93af46452485dfe44bb6;hpb=457ac58cc72a0b7161a0369a8f282f38ff0f2f93;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 403d0bb28..06b0bed41 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -7,14 +7,14 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - clean_html, +from ..compat import ( compat_HTTPError, compat_urllib_parse, compat_urllib_request, compat_urlparse, +) +from ..utils import ( ExtractorError, - get_element_by_attribute, InAdvancePagedList, int_or_none, RegexNotFoundError, @@ -56,7 +56,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'''(?x) - (?P(?:https?:)?//)? + https?:// (?:(?:www|(?Pplayer))\.)? vimeo(?Ppro)?\.com/ (?!channels/[^/?#]+/?(?:$|[?#])|album/) @@ -91,6 +91,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'description': 'md5:380943ec71b89736ff4bf27183233d09', 'duration': 1595, }, }, @@ -105,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader': 'The BLN & Business of Software', 'uploader_id': 'theblnbusinessofsoftware', 'duration': 3610, + 'description': None, }, }, { @@ -119,6 +121,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, + 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', @@ -154,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'duration': 62, } }, + { + # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ + 'url': 'https://player.vimeo.com/video/98044508', + 'note': 'The js code contains assignments to the same variable as the config', + 'info_dict': { + 'id': '98044508', + 'ext': 'mp4', + 'title': 'Pier Solar OUYA Official Trailer', + 'uploader': 'Tulio Gonçalves', + 'uploader_id': 'user28849593', + }, + }, ] def _verify_video_password(self, url, video_id, webpage): @@ -205,6 +220,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + orig_url = url if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id @@ -240,11 +256,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): # We try to find out to which variable is assigned the config dic m_variable_name = re.search('(\w)\.video\.id', webpage) if m_variable_name is not None: - config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) + config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1)) else: config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config = self._search_regex(config_re, webpage, 'info section', - flags=re.DOTALL) + flags=re.DOTALL) config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): @@ -275,18 +291,23 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video description - video_description = None - try: - video_description = get_element_by_attribute("class", "description_wrapper", webpage) - if video_description: - video_description = clean_html(video_description) - except AssertionError as err: - # On some pages like (http://player.vimeo.com/video/54469442) the - # html tags are not closed, python 2.6 cannot handle it - if err.args[0] == 'we should not get here!': - pass - else: - raise + + video_description = self._html_search_regex( + r'(?s)]*>(.*?)', + webpage, 'description', default=None) + if not video_description: + video_description = self._html_search_meta( + 'description', webpage, default=None) + if not video_description and mobj.group('pro'): + orig_webpage = self._download_webpage( + orig_url, video_id, + note='Downloading webpage for description', + fatal=False) + if orig_webpage: + video_description = self._html_search_meta( + 'description', orig_webpage, default=None) + if not video_description and not mobj.group('player'): + self._downloader.report_warning('Cannot find video description') # Extract video duration video_duration = int_or_none(config["video"].get("duration")) @@ -493,7 +514,7 @@ class VimeoReviewIE(InfoExtractor): 'info_dict': { 'id': '91613211', 'ext': 'mp4', - 'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn', + 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn', 'uploader': 'DevWeek Events', 'duration': 2773, 'thumbnail': 're:^https?://.*\.jpg$',