X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvimeo.py;h=ddf375c6c2189f4eea655196a69c7e2339952527;hb=894b3826f5a2e1742010c20554a6a1b9e98a51ee;hp=9aec8a2ab16d29badf85c6804d9d51d3f3127fd5;hpb=067aa17edf5a46a8cbc4d6b90864eddf051fa2bc;p=youtube-dl diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 9aec8a2ab..ddf375c6c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -16,10 +18,10 @@ from ..utils import ( determine_ext, ExtractorError, js_to_json, - InAdvancePagedList, int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -99,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -109,23 +118,9 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _parse_config(self, config, video_id): video_data = config['video'] - # Extract title video_title = video_data['title'] - - # Extract uploader, uploader_url and uploader_id - video_uploader = video_data.get('owner', {}).get('name') - video_uploader_url = video_data.get('owner', {}).get('url') - video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None - - # Extract video thumbnail - video_thumbnail = video_data.get('thumbnail') - if video_thumbnail is None: - video_thumbs = video_data.get('thumbs') - if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] - - # Extract video duration - video_duration = int_or_none(video_data.get('duration')) + live_event = video_data.get('live_event') or {} + is_live = live_event.get('status') == 'started' formats = [] config_files = video_data.get('files') or config['request'].get('files', {}) @@ -142,6 +137,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'tbr': int_or_none(f.get('bitrate')), }) + # TODO: fix handling of 308 status code returned for live archive manifest requests for files_type in ('hls', 'dash'): for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): manifest_url = cdn_data.get('url') @@ -151,7 +147,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): if files_type == 'hls': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', - 'm3u8_native', m3u8_id=format_id, + 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id, note='Downloading %s m3u8 information' % cdn_name, fatal=False)) elif files_type == 'dash': @@ -164,6 +160,10 @@ class VimeoBaseInfoExtractor(InfoExtractor): else: mpd_manifest_urls = [(format_id, manifest_url)] for f_id, m_url in mpd_manifest_urls: + if 'json=1' in m_url: + real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url') + if real_m_url: + m_url = real_m_url mpd_formats = self._extract_mpd_formats( m_url.replace('/master.json', '/master.mpd'), video_id, f_id, 'Downloading %s MPD information' % cdn_name, @@ -175,6 +175,15 @@ class VimeoBaseInfoExtractor(InfoExtractor): f['preference'] = -40 formats.extend(mpd_formats) + live_archive = live_event.get('archive') or {} + live_archive_source_url = live_archive.get('source_url') + if live_archive_source_url and live_archive.get('status') == 'done': + formats.append({ + 'format_id': 'live-archive-source', + 'url': live_archive_source_url, + 'preference': 1, + }) + subtitles = {} text_tracks = config['request'].get('text_tracks') if text_tracks: @@ -184,15 +193,33 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'url': 'https://vimeo.com' + tt['url'], }] + thumbnails = [] + if not is_live: + for key, thumb in video_data.get('thumbs', {}).items(): + thumbnails.append({ + 'id': key, + 'width': int_or_none(key), + 'url': thumb, + }) + thumbnail = video_data.get('thumbnail') + if thumbnail: + thumbnails.append({ + 'url': thumbnail, + }) + + owner = video_data.get('owner') or {} + video_uploader_url = owner.get('url') + return { - 'title': video_title, - 'uploader': video_uploader, - 'uploader_id': video_uploader_id, + 'title': self._live_title(video_title) if is_live else video_title, + 'uploader': owner.get('name'), + 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 'uploader_url': video_uploader_url, - 'thumbnail': video_thumbnail, - 'duration': video_duration, + 'thumbnails': thumbnails, + 'duration': int_or_none(video_data.get('duration')), 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, } def _extract_original_format(self, url, video_id): @@ -236,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?Ppro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -563,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -796,7 +821,8 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex( + self._TITLE_RE, webpage, 'list title', fatal=False) def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -887,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'