- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
- video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
- # upload date
- upload_date = self._html_search_meta(
- 'datePublished', video_webpage, 'upload date', default=None)
- if not upload_date:
- upload_date = self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
- video_webpage, 'upload date', default=None)
- if upload_date:
- upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
- upload_date = unified_strdate(upload_date)
-
- m_cat_container = self._search_regex(
- r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', default=None)
- if m_cat_container:
- category = self._html_search_regex(
- r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
- default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
-
- video_tags = [
- unescapeHTML(m.group('content'))
- for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
- def _extract_count(count_name):
- return str_to_int(self._search_regex(
- r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
- % re.escape(count_name),
- video_webpage, count_name, default=None))
-
- like_count = _extract_count('like')
- dislike_count = _extract_count('dislike')
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
- automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
-
- if 'length_seconds' not in video_info:
- self._downloader.report_warning('unable to extract video duration')
- video_duration = None
- else:
- video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
-
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
- video_annotations = self._extract_annotations(video_id)
-
- def _map_to_format_list(urlmap):
- formats = []
- for itag, video_real_url in urlmap.items():
- dct = {
- 'format_id': itag,
- 'url': video_real_url,
- 'player_url': player_url,
- }
- if itag in self._formats:
- dct.update(self._formats[itag])
- formats.append(dct)
- return formats
-
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
- if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
- formats = []
- for url_data_str in encoded_url_map.split(','):
- url_data = compat_parse_qs(url_data_str)
- if 'itag' not in url_data or 'url' not in url_data:
- continue
- format_id = url_data['itag'][0]
- url = url_data['url'][0]
-
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')