unified_strdate,
unsmuggle_url,
uppercase_escape,
+ url_or_none,
urlencode_postdata,
)
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
'only_matching': True,
},
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ }
]
def __init__(self, *args, **kwargs):
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+ r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode)
self._downloader.report_warning(err_msg)
return {}
- def _mark_watched(self, video_id, video_info):
- playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+ def _mark_watched(self, video_id, video_info, player_response):
+ playback_url = url_or_none(try_get(
+ player_response,
+ lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
+ video_info, lambda x: x['videostats_playback_base_url'][0]))
if not playback_url:
return
parsed_playback_url = compat_urlparse.urlparse(playback_url)
'"token" parameter not in video info for unknown reason',
video_id=video_id)
+ if video_info.get('license_info'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {}
else:
video_description = ''
- if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
+ if not smuggled_data.get('force_singlefeed', False):
if not self._downloader.params.get('noplaylist'):
- entries = []
- feed_ids = []
- multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/rg3/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
- {'force_singlefeed': True}),
- 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
- })
- feed_ids.append(feed_data['id'][0])
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(entries, video_id, video_title, video_description)
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ multifeed_metadata_list = try_get(
+ player_response,
+ lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
+ compat_str) or try_get(
+ video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
+ if multifeed_metadata_list:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/rg3/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+ })
+ feed_ids.append(feed_data['id'][0])
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(entries, video_id, video_title, video_description)
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
if view_count is None:
view_count = extract_view_count(video_info)
'height': int_or_none(width_height[1]),
}
q = qualities(['small', 'medium', 'hd720'])
+ streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
+ if streaming_formats:
+ for fmt in streaming_formats:
+ itag = str_or_none(fmt.get('itag'))
+ if not itag:
+ continue
+ quality = fmt.get('quality')
+ quality_label = fmt.get('qualityLabel') or quality
+ formats_spec[itag] = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_note': quality_label,
+ 'fps': int_or_none(fmt.get('fps')),
+ 'height': int_or_none(fmt.get('height')),
+ 'quality': q(quality),
+ # bitrate for itag 43 is always 2147483647
+ 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+ 'width': int_or_none(fmt.get('width')),
+ }
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
else:
player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
+ r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
filesize = int_or_none(url_data.get(
'clen', [None])[0]) or _extract_filesize(url)
- quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+ quality = url_data.get('quality', [None])[0]
more_fields = {
'filesize': filesize,
'width': width,
'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]),
- 'format_note': quality,
+ 'format_note': url_data.get('quality_label', [None])[0] or quality,
'quality': q(quality),
}
for key, value in more_fields.items():
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)
if m_episode:
- series = m_episode.group('series')
+ series = unescapeHTML(m_episode.group('series'))
season_number = int(m_episode.group('season'))
episode_number = int(m_episode.group('episode'))
else:
self._sort_formats(formats)
- self.mark_watched(video_id, video_info)
+ self.mark_watched(video_id, video_info, player_response)
return {
'id': video_id,