3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
59 class YoutubeBaseInfoExtractor(InfoExtractor):
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
74 def _set_language(self):
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
77 # YouTube sets the expire time to about two months
78 expire_time=time.time() + 2 * 30 * 24 * 3600)
80 def _ids_to_results(self, ids):
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
93 username, password = self._get_login_info()
94 # No authentication to be performed
96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
104 if login_page is False:
107 login_form = self._hidden_inputs(login_page)
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
117 'f.req': json.dumps(f_req),
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
133 self._downloader.report_warning(message)
137 None, [], None, 'US', None, None, 2, False, True,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
143 1, [None, None, []], None, None, None, True
148 lookup_results = req(
149 self._LOOKUP_URL, lookup_req,
150 'Looking up account info', 'Unable to look up account info')
152 if lookup_results is False:
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
157 warn('Unable to extract user hash')
162 None, 1, None, [1, None, None, None, [password, None, True]],
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
172 if challenge_results is False:
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
185 warn('Unable to extract result entry')
188 login_challenge = try_get(res, lambda x: x[0][0], list)
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
194 status = try_get(login_challenge, lambda x: x[5], compat_str)
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
201 warn('Unable to extract TL')
204 tfa_code = self._get_tfa_info('2-step verification code')
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
212 tfa_code = remove_start(tfa_code, 'G-')
215 user_hash, None, 2, None,
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
225 if tfa_results is False:
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
244 challenge = CHALLENGES.get(
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
256 check_cookie_results = self._download_webpage(
257 check_cookie_url, None, 'Checking cookie', fatal=False)
259 if check_cookie_results is False:
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
268 def _download_webpage_handle(self, *args, **kwargs):
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
273 *args, **compat_kwargs(kwargs))
275 def _real_initialize(self):
276 if self._downloader is None:
279 if not self._login():
283 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
284 # Extract entries from page with "Load more" button
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
288 for entry in self._process_page(content_html):
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
297 while count <= retries:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
319 more_widget_html = more['load_more_widget_html']
322 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
332 video_id = mobj.group('id')
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
336 video_title = video_title.strip()
337 if video_title == '► Play all':
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
347 def extract_videos_from_page(self, page):
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
352 return zip(ids_in_page, titles_in_page)
355 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
366 title = self._og_search_title(webpage, fatal=False)
367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
370 class YoutubeIE(YoutubeBaseInfoExtractor):
371 IE_DESC = 'YouTube.com'
372 _VALID_URL = r"""(?x)^
374 (?:https?://|//) # http(s):// or protocol-independent URL
375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
376 (?:www\.)?deturl\.com/www\.youtube\.com/|
377 (?:www\.)?pwnyoutube\.com/|
378 (?:www\.)?hooktube\.com/|
379 (?:www\.)?yourepeat\.com/|
380 tube\.majestyc\.net/|
381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
382 (?:(?:www|dev)\.)?invidio\.us/|
383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
385 (?:www\.)?invidious\.kabi\.tk/|
386 (?:www\.)?invidious\.13ad\.de/|
387 (?:www\.)?invidious\.mastodon\.host/|
388 (?:www\.)?invidious\.nixnet\.xyz/|
389 (?:www\.)?invidious\.drycat\.fr/|
390 (?:www\.)?tube\.poal\.co/|
391 (?:www\.)?vid\.wxzm\.sx/|
392 (?:www\.)?yt\.elukerio\.org/|
393 (?:www\.)?yt\.lelux\.fi/|
394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
401 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
402 (?:.*?\#/)? # handle anchor (#/) redirect urls
403 (?: # the various things that can precede the ID:
404 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
405 |(?: # or the v= param in all its forms
406 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
407 (?:\?|\#!?) # the params delimiter ? or # or #!
408 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
413 youtu\.be| # just youtu.be/xxxx
414 vid\.plus| # or vid.plus/xxxx
415 zwearz\.com/watch| # or zwearz.com/watch/xxxx
417 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
419 )? # all until now is optional -> you can pass the naked ID
420 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
423 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
424 WL # WL are handled by the watch later IE
427 (?(1).+)? # if we found the ID, everything can follow
428 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
429 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
431 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
434 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
435 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
436 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
437 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
440 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
441 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
446 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
447 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
448 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
452 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
456 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
457 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
458 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
460 # Apple HTTP Live Streaming
461 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
467 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
468 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
471 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
477 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
485 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
486 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
487 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
488 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
491 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
494 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
501 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
510 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
518 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
519 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
521 # Dash webm audio with opus inside
522 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
523 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
524 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
527 '_rtmp': {'protocol': 'rtmp'},
529 # av01 video only formats sometimes served with "unknown" codecs
530 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
542 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
546 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
547 'uploader': 'Philipp Hagemeister',
548 'uploader_id': 'phihag',
549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
550 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
551 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
552 'upload_date': '20121002',
553 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
554 'categories': ['Science & Technology'],
555 'tags': ['youtube-dl'],
559 'dislike_count': int,
565 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
566 'note': 'Test generic use_cipher_signature video (#897)',
570 'upload_date': '20120506',
571 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
572 'alt_title': 'I Love It (feat. Charli XCX)',
573 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
574 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
575 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
576 'iconic ep', 'iconic', 'love', 'it'],
578 'uploader': 'Icona Pop',
579 'uploader_id': 'IconaPop',
580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
581 'creator': 'Icona Pop',
582 'track': 'I Love It (feat. Charli XCX)',
583 'artist': 'Icona Pop',
587 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
588 'note': 'Test VEVO video with age protection (#956)',
592 'upload_date': '20130703',
593 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
594 'alt_title': 'Tunnel Vision',
595 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
597 'uploader': 'justintimberlakeVEVO',
598 'uploader_id': 'justintimberlakeVEVO',
599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
600 'creator': 'Justin Timberlake',
601 'track': 'Tunnel Vision',
602 'artist': 'Justin Timberlake',
607 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
608 'note': 'Embed-only video (#1746)',
612 'upload_date': '20120608',
613 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
614 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
615 'uploader': 'SET India',
616 'uploader_id': 'setindia',
617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
622 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
623 'note': 'Use the first video ID in the URL',
627 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
628 'uploader': 'Philipp Hagemeister',
629 'uploader_id': 'phihag',
630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
631 'upload_date': '20121002',
632 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
633 'categories': ['Science & Technology'],
634 'tags': ['youtube-dl'],
638 'dislike_count': int,
641 'skip_download': True,
645 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
646 'note': '256k DASH audio (format 141) via DASH manifest',
650 'upload_date': '20121002',
651 'uploader_id': '8KVIDEO',
652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
654 'uploader': '8KVIDEO',
655 'title': 'UHDTV TEST 8K VIDEO.mp4'
658 'youtube_include_dash_manifest': True,
661 'skip': 'format 141 not served anymore',
663 # DASH manifest with encrypted signature
665 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
669 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
670 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
672 'uploader': 'AfrojackVEVO',
673 'uploader_id': 'AfrojackVEVO',
674 'upload_date': '20131011',
677 'youtube_include_dash_manifest': True,
678 'format': '141/bestaudio[ext=m4a]',
681 # JS player signature function name containing $
683 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
687 'title': 'Taylor Swift - Shake It Off',
688 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
690 'uploader': 'TaylorSwiftVEVO',
691 'uploader_id': 'TaylorSwiftVEVO',
692 'upload_date': '20140818',
693 'creator': 'Taylor Swift',
696 'youtube_include_dash_manifest': True,
697 'format': '141/bestaudio[ext=m4a]',
702 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
707 'upload_date': '20100909',
708 'uploader': 'Amazing Atheist',
709 'uploader_id': 'TheAmazingAtheist',
710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
711 'title': 'Burning Everyone\'s Koran',
712 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
715 # Normal age-gate video (No vevo, embed allowed)
717 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
721 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
722 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
724 'uploader': 'The Witcher',
725 'uploader_id': 'WitcherGame',
726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
727 'upload_date': '20140605',
731 # Age-gate video with encrypted signature
733 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
737 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
738 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
740 'uploader': 'LloydVEVO',
741 'uploader_id': 'LloydVEVO',
742 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
743 'upload_date': '20110629',
747 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
748 # YouTube Red ad is not captured for creator
750 'url': '__2ABJjxzNo',
755 'upload_date': '20100430',
756 'uploader_id': 'deadmau5',
757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
758 'creator': 'deadmau5',
759 'description': 'md5:12c56784b8032162bb936a5f76d55360',
760 'uploader': 'deadmau5',
761 'title': 'Deadmau5 - Some Chords (HD)',
762 'alt_title': 'Some Chords',
764 'expected_warnings': [
765 'DASH manifest missing',
768 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
770 'url': 'lqQg6PlCWgI',
775 'upload_date': '20150827',
776 'uploader_id': 'olympic',
777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
778 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
779 'uploader': 'Olympic',
780 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
783 'skip_download': 'requires avconv',
788 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
792 'stretched_ratio': 16 / 9.,
794 'upload_date': '20110310',
795 'uploader_id': 'AllenMeow',
796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
797 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
799 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
802 # url_encoded_fmt_stream_map is empty string
804 'url': 'qEJwOuvDf7I',
808 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
810 'upload_date': '20150404',
811 'uploader_id': 'spbelect',
812 'uploader': 'Наблюдатели Петербурга',
815 'skip_download': 'requires avconv',
817 'skip': 'This live event has ended.',
819 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
821 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
825 'title': 'md5:7b81415841e02ecd4313668cde88737a',
826 'description': 'md5:116377fd2963b81ec4ce64b542173306',
828 'upload_date': '20150625',
829 'uploader_id': 'dorappi2000',
830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
831 'uploader': 'dorappi2000',
832 'formats': 'mincount:31',
834 'skip': 'not actual anymore',
836 # DASH manifest with segment_list
838 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
839 'md5': '8ce563a1d667b599d21064e982ab9e31',
843 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
844 'uploader': 'Airtek',
845 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
846 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
847 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
850 'youtube_include_dash_manifest': True,
851 'format': '135', # bestvideo
853 'skip': 'This live event has ended.',
856 # Multifeed videos (multiple cameras), URL is for Main Camera
857 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
860 'title': 'teamPGP: Rocket League Noob Stream',
861 'description': 'md5:dc7872fb300e143831327f1bae3af010',
867 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
868 'description': 'md5:dc7872fb300e143831327f1bae3af010',
870 'upload_date': '20150721',
871 'uploader': 'Beer Games Beer',
872 'uploader_id': 'beergamesbeer',
873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
874 'license': 'Standard YouTube License',
880 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
881 'description': 'md5:dc7872fb300e143831327f1bae3af010',
883 'upload_date': '20150721',
884 'uploader': 'Beer Games Beer',
885 'uploader_id': 'beergamesbeer',
886 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
887 'license': 'Standard YouTube License',
893 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
894 'description': 'md5:dc7872fb300e143831327f1bae3af010',
896 'upload_date': '20150721',
897 'uploader': 'Beer Games Beer',
898 'uploader_id': 'beergamesbeer',
899 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
900 'license': 'Standard YouTube License',
906 'title': 'teamPGP: Rocket League Noob Stream (zim)',
907 'description': 'md5:dc7872fb300e143831327f1bae3af010',
909 'upload_date': '20150721',
910 'uploader': 'Beer Games Beer',
911 'uploader_id': 'beergamesbeer',
912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
913 'license': 'Standard YouTube License',
917 'skip_download': True,
919 'skip': 'This video is not available.',
922 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
923 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
926 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
929 'skip': 'Not multifeed anymore',
932 'url': 'https://vid.plus/FlRa-iH7PGw',
933 'only_matching': True,
936 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
937 'only_matching': True,
940 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
941 # Also tests cut-off URL expansion in video description (see
942 # https://github.com/ytdl-org/youtube-dl/issues/1892,
943 # https://github.com/ytdl-org/youtube-dl/issues/8164)
944 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
948 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
949 'alt_title': 'Dark Walk - Position Music',
950 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
952 'upload_date': '20151119',
953 'uploader_id': 'IronSoulElf',
954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
955 'uploader': 'IronSoulElf',
956 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
957 'track': 'Dark Walk - Position Music',
958 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
959 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
962 'skip_download': True,
966 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
967 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
968 'only_matching': True,
971 # Video with yt:stretch=17:0
972 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
976 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
977 'description': 'md5:ee18a25c350637c8faff806845bddee9',
978 'upload_date': '20151107',
979 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
980 'uploader': 'CH GAMER DROID',
983 'skip_download': True,
985 'skip': 'This video does not exist.',
988 # Video licensed under Creative Commons
989 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
993 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
994 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
996 'upload_date': '20150127',
997 'uploader_id': 'BerkmanCenter',
998 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
999 'uploader': 'The Berkman Klein Center for Internet & Society',
1000 'license': 'Creative Commons Attribution license (reuse allowed)',
1003 'skip_download': True,
1007 # Channel-like uploader_url
1008 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1010 'id': 'eQcmzGIKrzg',
1012 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1013 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1015 'upload_date': '20151119',
1016 'uploader': 'Bernie Sanders',
1017 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1022 'skip_download': True,
1026 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1027 'only_matching': True,
1030 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1031 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1032 'only_matching': True,
1035 # Rental video preview
1036 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1038 'id': 'uGpuVWrhIzE',
1040 'title': 'Piku - Trailer',
1041 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1042 'upload_date': '20150811',
1043 'uploader': 'FlixMatrix',
1044 'uploader_id': 'FlixMatrixKaravan',
1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1046 'license': 'Standard YouTube License',
1049 'skip_download': True,
1051 'skip': 'This video is not available.',
1054 # YouTube Red video with episode data
1055 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1057 'id': 'iqKdEhx-dD4',
1059 'title': 'Isolation - Mind Field (Ep 1)',
1060 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1062 'upload_date': '20170118',
1063 'uploader': 'Vsauce',
1064 'uploader_id': 'Vsauce',
1065 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1066 'series': 'Mind Field',
1068 'episode_number': 1,
1071 'skip_download': True,
1073 'expected_warnings': [
1074 'Skipping DASH manifest',
1078 # The following content has been identified by the YouTube community
1079 # as inappropriate or offensive to some audiences.
1080 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1082 'id': '6SJNVb0GnPI',
1084 'title': 'Race Differences in Intelligence',
1085 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1087 'upload_date': '20140124',
1088 'uploader': 'New Century Foundation',
1089 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1093 'skip_download': True,
1098 'url': '1t24XAntNCY',
1099 'only_matching': True,
1102 # geo restricted to JP
1103 'url': 'sJL6WA-aGkQ',
1104 'only_matching': True,
1107 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1108 'only_matching': True,
1111 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1112 'only_matching': True,
1116 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1117 'only_matching': True,
1120 # Video with unsupported adaptive stream type formats
1121 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1123 'id': 'Z4Vy8R84T1U',
1125 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1126 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1128 'upload_date': '20130923',
1129 'uploader': 'Amelia Putri Harwita',
1130 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1132 'formats': 'maxcount:10',
1135 'skip_download': True,
1136 'youtube_include_dash_manifest': False,
1140 # Youtube Music Auto-generated description
1141 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1143 'id': 'MgNrAu2pzNs',
1145 'title': 'Voyeur Girl',
1146 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1147 'upload_date': '20190312',
1148 'uploader': 'Various Artists - Topic',
1149 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1150 'artist': 'Stephen',
1151 'track': 'Voyeur Girl',
1152 'album': 'it\'s too much love to know my dear',
1153 'release_date': '20190313',
1154 'release_year': 2019,
1157 'skip_download': True,
1161 # Youtube Music Auto-generated description
1162 # Retrieve 'artist' field from 'Artist:' in video description
1163 # when it is present on youtube music video
1164 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1166 'id': 'k0jLE7tTwjY',
1168 'title': 'Latch Feat. Sam Smith',
1169 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1170 'upload_date': '20150110',
1171 'uploader': 'Various Artists - Topic',
1172 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1173 'artist': 'Disclosure',
1174 'track': 'Latch Feat. Sam Smith',
1175 'album': 'Latch Featuring Sam Smith',
1176 'release_date': '20121008',
1177 'release_year': 2012,
1180 'skip_download': True,
1184 # Youtube Music Auto-generated description
1185 # handle multiple artists on youtube music video
1186 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1188 'id': '74qn0eJSjpA',
1190 'title': 'Eastside',
1191 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1192 'upload_date': '20180710',
1193 'uploader': 'Benny Blanco - Topic',
1194 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1195 'artist': 'benny blanco, Halsey, Khalid',
1196 'track': 'Eastside',
1197 'album': 'Eastside',
1198 'release_date': '20180713',
1199 'release_year': 2018,
1202 'skip_download': True,
1206 # Youtube Music Auto-generated description
1207 # handle youtube music video with release_year and no release_date
1208 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1210 'id': '-hcAI0g-f5M',
1212 'title': 'Put It On Me',
1213 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1214 'upload_date': '20180426',
1215 'uploader': 'Matt Maeson - Topic',
1216 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1217 'artist': 'Matt Maeson',
1218 'track': 'Put It On Me',
1219 'album': 'The Hearse',
1220 'release_date': None,
1221 'release_year': 2018,
1224 'skip_download': True,
1228 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1229 'only_matching': True,
1233 def __init__(self, *args, **kwargs):
1234 super(YoutubeIE, self).__init__(*args, **kwargs)
1235 self._player_cache = {}
1237 def report_video_info_webpage_download(self, video_id):
1238 """Report attempt to download video info webpage."""
1239 self.to_screen('%s: Downloading video info webpage' % video_id)
1241 def report_information_extraction(self, video_id):
1242 """Report attempt to extract video information."""
1243 self.to_screen('%s: Extracting video information' % video_id)
1245 def report_unavailable_format(self, video_id, format):
1246 """Report extracted video URL."""
1247 self.to_screen('%s: Format %s not available' % (video_id, format))
1249 def report_rtmp_download(self):
1250 """Indicate the download will use the RTMP protocol."""
1251 self.to_screen('RTMP download detected')
1253 def _signature_cache_id(self, example_sig):
1254 """ Return a string representation of a signature """
1255 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1257 def _extract_signature_function(self, video_id, player_url, example_sig):
1259 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1262 raise ExtractorError('Cannot identify player %r' % player_url)
1263 player_type = id_m.group('ext')
1264 player_id = id_m.group('id')
1266 # Read from filesystem cache
1267 func_id = '%s_%s_%s' % (
1268 player_type, player_id, self._signature_cache_id(example_sig))
1269 assert os.path.basename(func_id) == func_id
1271 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1272 if cache_spec is not None:
1273 return lambda s: ''.join(s[i] for i in cache_spec)
1276 'Downloading player %s' % player_url
1277 if self._downloader.params.get('verbose') else
1278 'Downloading %s player %s' % (player_type, player_id)
1280 if player_type == 'js':
1281 code = self._download_webpage(
1282 player_url, video_id,
1284 errnote='Download of %s failed' % player_url)
1285 res = self._parse_sig_js(code)
1286 elif player_type == 'swf':
1287 urlh = self._request_webpage(
1288 player_url, video_id,
1290 errnote='Download of %s failed' % player_url)
1292 res = self._parse_sig_swf(code)
1294 assert False, 'Invalid player type %r' % player_type
1296 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1297 cache_res = res(test_string)
1298 cache_spec = [ord(c) for c in cache_res]
1300 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1303 def _print_sig_code(self, func, example_sig):
1304 def gen_sig_code(idxs):
1305 def _genslice(start, end, step):
1306 starts = '' if start == 0 else str(start)
1307 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1308 steps = '' if step == 1 else (':%d' % step)
1309 return 's[%s%s%s]' % (starts, ends, steps)
1312 # Quelch pyflakes warnings - start will be set when step is set
1313 start = '(Never used)'
1314 for i, prev in zip(idxs[1:], idxs[:-1]):
1315 if step is not None:
1316 if i - prev == step:
1318 yield _genslice(start, prev, step)
1321 if i - prev in [-1, 1]:
1326 yield 's[%d]' % prev
1330 yield _genslice(start, i, step)
1332 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1333 cache_res = func(test_string)
1334 cache_spec = [ord(c) for c in cache_res]
1335 expr_code = ' + '.join(gen_sig_code(cache_spec))
1336 signature_id_tuple = '(%s)' % (
1337 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1338 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1339 ' return %s\n') % (signature_id_tuple, expr_code)
1340 self.to_screen('Extracted signature function:\n' + code)
1342 def _parse_sig_js(self, jscode):
1343 funcname = self._search_regex(
1344 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1348 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1356 jscode, 'Initial JS player signature function name', group='sig')
1358 jsi = JSInterpreter(jscode)
1359 initial_function = jsi.extract_function(funcname)
1360 return lambda s: initial_function([s])
1362 def _parse_sig_swf(self, file_contents):
1363 swfi = SWFInterpreter(file_contents)
1364 TARGET_CLASSNAME = 'SignatureDecipher'
1365 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1366 initial_function = swfi.extract_function(searched_class, 'decipher')
1367 return lambda s: initial_function([s])
1369 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1370 """Turn the encrypted s field into a working signature"""
1372 if player_url is None:
1373 raise ExtractorError('Cannot decrypt signature without player_url')
1375 if player_url.startswith('//'):
1376 player_url = 'https:' + player_url
1377 elif not re.match(r'https?://', player_url):
1378 player_url = compat_urlparse.urljoin(
1379 'https://www.youtube.com', player_url)
1381 player_id = (player_url, self._signature_cache_id(s))
1382 if player_id not in self._player_cache:
1383 func = self._extract_signature_function(
1384 video_id, player_url, s
1386 self._player_cache[player_id] = func
1387 func = self._player_cache[player_id]
1388 if self._downloader.params.get('youtube_print_sig_code'):
1389 self._print_sig_code(func, s)
1391 except Exception as e:
1392 tb = traceback.format_exc()
1393 raise ExtractorError(
1394 'Signature extraction failed: ' + tb, cause=e)
1396 def _get_subtitles(self, video_id, webpage):
1398 subs_doc = self._download_xml(
1399 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1400 video_id, note=False)
1401 except ExtractorError as err:
1402 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1406 for track in subs_doc.findall('track'):
1407 lang = track.attrib['lang_code']
1408 if lang in sub_lang_list:
1411 for ext in self._SUBTITLE_FORMATS:
1412 params = compat_urllib_parse_urlencode({
1416 'name': track.attrib['name'].encode('utf-8'),
1418 sub_formats.append({
1419 'url': 'https://www.youtube.com/api/timedtext?' + params,
1422 sub_lang_list[lang] = sub_formats
1423 if not sub_lang_list:
1424 self._downloader.report_warning('video doesn\'t have subtitles')
1426 return sub_lang_list
1428 def _get_ytplayer_config(self, video_id, webpage):
1430 # User data may contain arbitrary character sequences that may affect
1431 # JSON extraction with regex, e.g. when '};' is contained the second
1432 # regex won't capture the whole JSON. Yet working around by trying more
1433 # concrete regex first keeping in mind proper quoted string handling
1434 # to be implemented in future that will replace this workaround (see
1435 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1436 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1437 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1438 r';ytplayer\.config\s*=\s*({.+?});',
1440 config = self._search_regex(
1441 patterns, webpage, 'ytplayer.config', default=None)
1443 return self._parse_json(
1444 uppercase_escape(config), video_id, fatal=False)
1446 def _get_automatic_captions(self, video_id, webpage):
1447 """We need the webpage for getting the captions url, pass it as an
1448 argument to speed up the process."""
1449 self.to_screen('%s: Looking for automatic captions' % video_id)
1450 player_config = self._get_ytplayer_config(video_id, webpage)
1451 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1452 if not player_config:
1453 self._downloader.report_warning(err_msg)
1456 args = player_config['args']
1457 caption_url = args.get('ttsurl')
1459 timestamp = args['timestamp']
1460 # We get the available subtitles
1461 list_params = compat_urllib_parse_urlencode({
1466 list_url = caption_url + '&' + list_params
1467 caption_list = self._download_xml(list_url, video_id)
1468 original_lang_node = caption_list.find('track')
1469 if original_lang_node is None:
1470 self._downloader.report_warning('Video doesn\'t have automatic captions')
1472 original_lang = original_lang_node.attrib['lang_code']
1473 caption_kind = original_lang_node.attrib.get('kind', '')
1476 for lang_node in caption_list.findall('target'):
1477 sub_lang = lang_node.attrib['lang_code']
1479 for ext in self._SUBTITLE_FORMATS:
1480 params = compat_urllib_parse_urlencode({
1481 'lang': original_lang,
1485 'kind': caption_kind,
1487 sub_formats.append({
1488 'url': caption_url + '&' + params,
1491 sub_lang_list[sub_lang] = sub_formats
1492 return sub_lang_list
1494 def make_captions(sub_url, sub_langs):
1495 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1496 caption_qs = compat_parse_qs(parsed_sub_url.query)
1498 for sub_lang in sub_langs:
1500 for ext in self._SUBTITLE_FORMATS:
1502 'tlang': [sub_lang],
1505 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1506 query=compat_urllib_parse_urlencode(caption_qs, True)))
1507 sub_formats.append({
1511 captions[sub_lang] = sub_formats
1514 # New captions format as of 22.06.2017
1515 player_response = args.get('player_response')
1516 if player_response and isinstance(player_response, compat_str):
1517 player_response = self._parse_json(
1518 player_response, video_id, fatal=False)
1520 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1521 base_url = renderer['captionTracks'][0]['baseUrl']
1523 for lang in renderer['translationLanguages']:
1524 lang_code = lang.get('languageCode')
1526 sub_lang_list.append(lang_code)
1527 return make_captions(base_url, sub_lang_list)
1529 # Some videos don't provide ttsurl but rather caption_tracks and
1530 # caption_translation_languages (e.g. 20LmZk1hakA)
1531 # Does not used anymore as of 22.06.2017
1532 caption_tracks = args['caption_tracks']
1533 caption_translation_languages = args['caption_translation_languages']
1534 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1536 for lang in caption_translation_languages.split(','):
1537 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1538 sub_lang = lang_qs.get('lc', [None])[0]
1540 sub_lang_list.append(sub_lang)
1541 return make_captions(caption_url, sub_lang_list)
1542 # An extractor error can be raise by the download process if there are
1543 # no automatic captions but there are subtitles
1544 except (KeyError, IndexError, ExtractorError):
1545 self._downloader.report_warning(err_msg)
1548 def _mark_watched(self, video_id, video_info, player_response):
1549 playback_url = url_or_none(try_get(
1551 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1552 video_info, lambda x: x['videostats_playback_base_url'][0]))
1553 if not playback_url:
1555 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1556 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1558 # cpn generation algorithm is reverse engineered from base.js.
1559 # In fact it works even with dummy cpn.
1560 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1561 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1567 playback_url = compat_urlparse.urlunparse(
1568 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1570 self._download_webpage(
1571 playback_url, video_id, 'Marking watched',
1572 'Unable to mark watched', fatal=False)
1575 def _extract_urls(webpage):
1576 # Embedded YouTube player
1578 unescapeHTML(mobj.group('url'))
1579 for mobj in re.finditer(r'''(?x)
1589 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1590 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1593 # lazyYT YouTube embed
1594 entries.extend(list(map(
1596 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1598 # Wordpress "YouTube Video Importer" plugin
1599 matches = re.findall(r'''(?x)<div[^>]+
1600 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1601 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1602 entries.extend(m[-1] for m in matches)
1607 def _extract_url(webpage):
1608 urls = YoutubeIE._extract_urls(webpage)
1609 return urls[0] if urls else None
1612 def extract_id(cls, url):
1613 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1615 raise ExtractorError('Invalid URL: %s' % url)
1616 video_id = mobj.group(2)
1620 def _extract_chapters(description, duration):
1623 chapter_lines = re.findall(
1624 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1626 if not chapter_lines:
1629 for next_num, (chapter_line, time_point) in enumerate(
1630 chapter_lines, start=1):
1631 start_time = parse_duration(time_point)
1632 if start_time is None:
1634 if start_time > duration:
1636 end_time = (duration if next_num == len(chapter_lines)
1637 else parse_duration(chapter_lines[next_num][1]))
1638 if end_time is None:
1640 if end_time > duration:
1642 if start_time > end_time:
1644 chapter_title = re.sub(
1645 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1646 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1648 'start_time': start_time,
1649 'end_time': end_time,
1650 'title': chapter_title,
1654 def _real_extract(self, url):
1655 url, smuggled_data = unsmuggle_url(url, {})
1658 'http' if self._downloader.params.get('prefer_insecure', False)
1663 parsed_url = compat_urllib_parse_urlparse(url)
1664 for component in [parsed_url.fragment, parsed_url.query]:
1665 query = compat_parse_qs(component)
1666 if start_time is None and 't' in query:
1667 start_time = parse_duration(query['t'][0])
1668 if start_time is None and 'start' in query:
1669 start_time = parse_duration(query['start'][0])
1670 if end_time is None and 'end' in query:
1671 end_time = parse_duration(query['end'][0])
1673 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1674 mobj = re.search(self._NEXT_URL_RE, url)
1676 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1677 video_id = self.extract_id(url)
1680 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1681 video_webpage = self._download_webpage(url, video_id)
1683 # Attempt to extract SWF player URL
1684 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1685 if mobj is not None:
1686 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1692 def add_dash_mpd(video_info):
1693 dash_mpd = video_info.get('dashmpd')
1694 if dash_mpd and dash_mpd[0] not in dash_mpds:
1695 dash_mpds.append(dash_mpd[0])
1697 def add_dash_mpd_pr(pl_response):
1698 dash_mpd = url_or_none(try_get(
1699 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1701 if dash_mpd and dash_mpd not in dash_mpds:
1702 dash_mpds.append(dash_mpd)
1707 def extract_view_count(v_info):
1708 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1710 def extract_token(v_info):
1711 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1713 def extract_player_response(player_response, video_id):
1714 pl_response = str_or_none(player_response)
1717 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1718 if isinstance(pl_response, dict):
1719 add_dash_mpd_pr(pl_response)
1722 player_response = {}
1725 embed_webpage = None
1726 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1728 # We simulate the access to the video from www.youtube.com/v/{video_id}
1729 # this can be viewed without login into Youtube
1730 url = proto + '://www.youtube.com/embed/%s' % video_id
1731 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1732 data = compat_urllib_parse_urlencode({
1733 'video_id': video_id,
1734 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1735 'sts': self._search_regex(
1736 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1738 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1739 video_info_webpage = self._download_webpage(
1740 video_info_url, video_id,
1741 note='Refetching age-gated info webpage',
1742 errnote='unable to download video info webpage')
1743 video_info = compat_parse_qs(video_info_webpage)
1744 pl_response = video_info.get('player_response', [None])[0]
1745 player_response = extract_player_response(pl_response, video_id)
1746 add_dash_mpd(video_info)
1747 view_count = extract_view_count(video_info)
1752 # Try looking directly into the video webpage
1753 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1755 args = ytplayer_config['args']
1756 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1757 # Convert to the same format returned by compat_parse_qs
1758 video_info = dict((k, [v]) for k, v in args.items())
1759 add_dash_mpd(video_info)
1760 # Rental video is not rented but preview is available (e.g.
1761 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1762 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1763 if not video_info and args.get('ypc_vid'):
1764 return self.url_result(
1765 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1766 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1768 sts = ytplayer_config.get('sts')
1769 if not player_response:
1770 player_response = extract_player_response(args.get('player_response'), video_id)
1771 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1772 add_dash_mpd_pr(player_response)
1773 # We also try looking in get_video_info since it may contain different dashmpd
1774 # URL that points to a DASH manifest with possibly different itag set (some itags
1775 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1776 # manifest pointed by get_video_info's dashmpd).
1777 # The general idea is to take a union of itags of both DASH manifests (for example
1778 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1779 self.report_video_info_webpage_download(video_id)
1780 for el in ('embedded', 'detailpage', 'vevo', ''):
1782 'video_id': video_id,
1792 video_info_webpage = self._download_webpage(
1793 '%s://www.youtube.com/get_video_info' % proto,
1794 video_id, note=False,
1795 errnote='unable to download video info webpage',
1796 fatal=False, query=query)
1797 if not video_info_webpage:
1799 get_video_info = compat_parse_qs(video_info_webpage)
1800 if not player_response:
1801 pl_response = get_video_info.get('player_response', [None])[0]
1802 player_response = extract_player_response(pl_response, video_id)
1803 add_dash_mpd(get_video_info)
1804 if view_count is None:
1805 view_count = extract_view_count(get_video_info)
1807 video_info = get_video_info
1808 get_token = extract_token(get_video_info)
1810 # Different get_video_info requests may report different results, e.g.
1811 # some may report video unavailability, but some may serve it without
1812 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1813 # the original webpage as well as el=info and el=embedded get_video_info
1814 # requests report video unavailability due to geo restriction while
1815 # el=detailpage succeeds and returns valid data). This is probably
1816 # due to YouTube measures against IP ranges of hosting providers.
1817 # Working around by preferring the first succeeded video_info containing
1818 # the token if no such video_info yet was found.
1819 token = extract_token(video_info)
1821 video_info = get_video_info
1824 def extract_unavailable_message():
1826 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1827 msg = self._html_search_regex(
1828 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1829 video_webpage, 'unavailable %s' % kind, default=None)
1831 messages.append(msg)
1833 return '\n'.join(messages)
1836 unavailable_message = extract_unavailable_message()
1837 if not unavailable_message:
1838 unavailable_message = 'Unable to extract video data'
1839 raise ExtractorError(
1840 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1842 video_details = try_get(
1843 player_response, lambda x: x['videoDetails'], dict) or {}
1845 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1847 self._downloader.report_warning('Unable to extract video title')
1850 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1851 if video_description:
1854 redir_url = compat_urlparse.urljoin(url, m.group(1))
1855 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1856 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1857 qs = compat_parse_qs(parsed_redir_url.query)
1863 description_original = video_description = re.sub(r'''(?x)
1865 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1866 (?:title|href)="([^"]+)"\s+
1867 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1871 ''', replace_url, video_description)
1872 video_description = clean_html(video_description)
1874 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1876 if not smuggled_data.get('force_singlefeed', False):
1877 if not self._downloader.params.get('noplaylist'):
1878 multifeed_metadata_list = try_get(
1880 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1881 compat_str) or try_get(
1882 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1883 if multifeed_metadata_list:
1886 for feed in multifeed_metadata_list.split(','):
1887 # Unquote should take place before split on comma (,) since textual
1888 # fields may contain comma as well (see
1889 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1890 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1892 '_type': 'url_transparent',
1893 'ie_key': 'Youtube',
1895 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1896 {'force_singlefeed': True}),
1897 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1899 feed_ids.append(feed_data['id'][0])
1901 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1902 % (', '.join(feed_ids), video_id))
1903 return self.playlist_result(entries, video_id, video_title, video_description)
1905 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1907 if view_count is None:
1908 view_count = extract_view_count(video_info)
1909 if view_count is None and video_details:
1910 view_count = int_or_none(video_details.get('viewCount'))
1913 is_live = bool_or_none(video_details.get('isLive'))
1915 # Check for "rental" videos
1916 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1917 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1919 def _extract_filesize(media_url):
1920 return int_or_none(self._search_regex(
1921 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1923 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1924 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1926 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1927 self.report_rtmp_download()
1929 'format_id': '_rtmp',
1931 'url': video_info['conn'][0],
1932 'player_url': player_url,
1934 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1935 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1936 if 'rtmpe%3Dyes' in encoded_url_map:
1937 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1940 fmt_list = video_info.get('fmt_list', [''])[0]
1942 for fmt in fmt_list.split(','):
1943 spec = fmt.split('/')
1945 width_height = spec[1].split('x')
1946 if len(width_height) == 2:
1947 formats_spec[spec[0]] = {
1948 'resolution': spec[1],
1949 'width': int_or_none(width_height[0]),
1950 'height': int_or_none(width_height[1]),
1952 for fmt in streaming_formats:
1953 itag = str_or_none(fmt.get('itag'))
1956 quality = fmt.get('quality')
1957 quality_label = fmt.get('qualityLabel') or quality
1958 formats_spec[itag] = {
1959 'asr': int_or_none(fmt.get('audioSampleRate')),
1960 'filesize': int_or_none(fmt.get('contentLength')),
1961 'format_note': quality_label,
1962 'fps': int_or_none(fmt.get('fps')),
1963 'height': int_or_none(fmt.get('height')),
1964 # bitrate for itag 43 is always 2147483647
1965 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1966 'width': int_or_none(fmt.get('width')),
1969 for fmt in streaming_formats:
1970 if fmt.get('drm_families'):
1972 url = url_or_none(fmt.get('url'))
1975 cipher = fmt.get('cipher')
1978 url_data = compat_parse_qs(cipher)
1979 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1984 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1986 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1987 # Unsupported FORMAT_STREAM_TYPE_OTF
1988 if stream_type == 3:
1991 format_id = fmt.get('itag') or url_data['itag'][0]
1994 format_id = compat_str(format_id)
1997 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1998 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1999 jsplayer_url_json = self._search_regex(
2001 embed_webpage if age_gate else video_webpage,
2002 'JS player URL (1)', default=None)
2003 if not jsplayer_url_json and not age_gate:
2004 # We need the embed website after all
2005 if embed_webpage is None:
2006 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2007 embed_webpage = self._download_webpage(
2008 embed_url, video_id, 'Downloading embed webpage')
2009 jsplayer_url_json = self._search_regex(
2010 ASSETS_RE, embed_webpage, 'JS player URL')
2012 player_url = json.loads(jsplayer_url_json)
2013 if player_url is None:
2014 player_url_json = self._search_regex(
2015 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2016 video_webpage, 'age gate player URL')
2017 player_url = json.loads(player_url_json)
2019 if 'sig' in url_data:
2020 url += '&signature=' + url_data['sig'][0]
2021 elif 's' in url_data:
2022 encrypted_sig = url_data['s'][0]
2024 if self._downloader.params.get('verbose'):
2025 if player_url is None:
2026 player_version = 'unknown'
2027 player_desc = 'unknown'
2029 if player_url.endswith('swf'):
2030 player_version = self._search_regex(
2031 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2032 'flash player', fatal=False)
2033 player_desc = 'flash player %s' % player_version
2035 player_version = self._search_regex(
2036 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2037 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2039 'html5 player', fatal=False)
2040 player_desc = 'html5 player %s' % player_version
2042 parts_sizes = self._signature_cache_id(encrypted_sig)
2043 self.to_screen('{%s} signature length %s, %s' %
2044 (format_id, parts_sizes, player_desc))
2046 signature = self._decrypt_signature(
2047 encrypted_sig, video_id, player_url, age_gate)
2048 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2049 url += '&%s=%s' % (sp, signature)
2050 if 'ratebypass' not in url:
2051 url += '&ratebypass=yes'
2054 'format_id': format_id,
2056 'player_url': player_url,
2058 if format_id in self._formats:
2059 dct.update(self._formats[format_id])
2060 if format_id in formats_spec:
2061 dct.update(formats_spec[format_id])
2063 # Some itags are not included in DASH manifest thus corresponding formats will
2064 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2065 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2066 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2067 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2070 width = int_or_none(fmt.get('width'))
2072 height = int_or_none(fmt.get('height'))
2074 filesize = int_or_none(url_data.get(
2075 'clen', [None])[0]) or _extract_filesize(url)
2077 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2078 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2080 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2081 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2082 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2085 'filesize': filesize,
2090 'format_note': quality_label or quality,
2092 for key, value in more_fields.items():
2095 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2097 type_split = type_.split(';')
2098 kind_ext = type_split[0].split('/')
2099 if len(kind_ext) == 2:
2101 dct['ext'] = mimetype2ext(type_split[0])
2102 if kind in ('audio', 'video'):
2104 for mobj in re.finditer(
2105 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2106 if mobj.group('key') == 'codecs':
2107 codecs = mobj.group('val')
2110 dct.update(parse_codecs(codecs))
2111 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2112 dct['downloader_options'] = {
2113 # Youtube throttles chunks >~10M
2114 'http_chunk_size': 10485760,
2119 url_or_none(try_get(
2121 lambda x: x['streamingData']['hlsManifestUrl'],
2123 or url_or_none(try_get(
2124 video_info, lambda x: x['hlsvp'][0], compat_str)))
2127 m3u8_formats = self._extract_m3u8_formats(
2128 manifest_url, video_id, 'mp4', fatal=False)
2129 for a_format in m3u8_formats:
2130 itag = self._search_regex(
2131 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2133 a_format['format_id'] = itag
2134 if itag in self._formats:
2135 dct = self._formats[itag].copy()
2136 dct.update(a_format)
2138 a_format['player_url'] = player_url
2139 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2140 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2141 formats.append(a_format)
2143 error_message = extract_unavailable_message()
2144 if not error_message:
2145 error_message = clean_html(try_get(
2146 player_response, lambda x: x['playabilityStatus']['reason'],
2148 if not error_message:
2149 error_message = clean_html(
2150 try_get(video_info, lambda x: x['reason'][0], compat_str))
2152 raise ExtractorError(error_message, expected=True)
2153 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2156 video_uploader = try_get(
2157 video_info, lambda x: x['author'][0],
2158 compat_str) or str_or_none(video_details.get('author'))
2160 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2162 self._downloader.report_warning('unable to extract uploader name')
2165 video_uploader_id = None
2166 video_uploader_url = None
2168 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2170 if mobj is not None:
2171 video_uploader_id = mobj.group('uploader_id')
2172 video_uploader_url = mobj.group('uploader_url')
2174 self._downloader.report_warning('unable to extract uploader nickname')
2177 str_or_none(video_details.get('channelId'))
2178 or self._html_search_meta(
2179 'channelId', video_webpage, 'channel id', default=None)
2180 or self._search_regex(
2181 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2182 video_webpage, 'channel id', default=None, group='id'))
2183 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2186 # We try first to get a high quality image:
2187 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2188 video_webpage, re.DOTALL)
2189 if m_thumb is not None:
2190 video_thumbnail = m_thumb.group(1)
2191 elif 'thumbnail_url' not in video_info:
2192 self._downloader.report_warning('unable to extract video thumbnail')
2193 video_thumbnail = None
2194 else: # don't panic if we can't find it
2195 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2198 upload_date = self._html_search_meta(
2199 'datePublished', video_webpage, 'upload date', default=None)
2201 upload_date = self._search_regex(
2202 [r'(?s)id="eow-date.*?>(.*?)</span>',
2203 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2204 video_webpage, 'upload date', default=None)
2205 upload_date = unified_strdate(upload_date)
2207 video_license = self._html_search_regex(
2208 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2209 video_webpage, 'license', default=None)
2211 m_music = re.search(
2213 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2221 \bhref=["\']/red[^>]*>| # drop possible
2222 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2229 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2230 video_creator = clean_html(m_music.group('creator'))
2232 video_alt_title = video_creator = None
2234 def extract_meta(field):
2235 return self._html_search_regex(
2236 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2237 video_webpage, field, default=None)
2239 track = extract_meta('Song')
2240 artist = extract_meta('Artist')
2241 album = extract_meta('Album')
2243 # Youtube Music Auto-generated description
2244 release_date = release_year = None
2245 if video_description:
2246 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2249 track = mobj.group('track').strip()
2251 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2253 album = mobj.group('album'.strip())
2254 release_year = mobj.group('release_year')
2255 release_date = mobj.group('release_date')
2257 release_date = release_date.replace('-', '')
2258 if not release_year:
2259 release_year = int(release_date[:4])
2261 release_year = int(release_year)
2263 m_episode = re.search(
2264 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2267 series = unescapeHTML(m_episode.group('series'))
2268 season_number = int(m_episode.group('season'))
2269 episode_number = int(m_episode.group('episode'))
2271 series = season_number = episode_number = None
2273 m_cat_container = self._search_regex(
2274 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2275 video_webpage, 'categories', default=None)
2277 category = self._html_search_regex(
2278 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2280 video_categories = None if category is None else [category]
2282 video_categories = None
2285 unescapeHTML(m.group('content'))
2286 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2288 def _extract_count(count_name):
2289 return str_to_int(self._search_regex(
2290 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2291 % re.escape(count_name),
2292 video_webpage, count_name, default=None))
2294 like_count = _extract_count('like')
2295 dislike_count = _extract_count('dislike')
2297 if view_count is None:
2298 view_count = str_to_int(self._search_regex(
2299 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2300 'view count', default=None))
2303 float_or_none(video_details.get('averageRating'))
2304 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2307 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2308 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2310 video_duration = try_get(
2311 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2312 if not video_duration:
2313 video_duration = int_or_none(video_details.get('lengthSeconds'))
2314 if not video_duration:
2315 video_duration = parse_duration(self._html_search_meta(
2316 'duration', video_webpage, 'video duration'))
2319 video_annotations = None
2320 if self._downloader.params.get('writeannotations', False):
2321 xsrf_token = self._search_regex(
2322 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2323 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2324 invideo_url = try_get(
2325 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2326 if xsrf_token and invideo_url:
2327 xsrf_field_name = self._search_regex(
2328 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2329 video_webpage, 'xsrf field name',
2330 group='xsrf_field_name', default='session_token')
2331 video_annotations = self._download_webpage(
2332 self._proto_relative_url(invideo_url),
2333 video_id, note='Downloading annotations',
2334 errnote='Unable to download video annotations', fatal=False,
2335 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2337 chapters = self._extract_chapters(description_original, video_duration)
2339 # Look for the DASH manifest
2340 if self._downloader.params.get('youtube_include_dash_manifest', True):
2341 dash_mpd_fatal = True
2342 for mpd_url in dash_mpds:
2345 def decrypt_sig(mobj):
2347 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2348 return '/signature/%s' % dec_s
2350 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2352 for df in self._extract_mpd_formats(
2353 mpd_url, video_id, fatal=dash_mpd_fatal,
2354 formats_dict=self._formats):
2355 if not df.get('filesize'):
2356 df['filesize'] = _extract_filesize(df['url'])
2357 # Do not overwrite DASH format found in some previous DASH manifest
2358 if df['format_id'] not in dash_formats:
2359 dash_formats[df['format_id']] = df
2360 # Additional DASH manifests may end up in HTTP Error 403 therefore
2361 # allow them to fail without bug report message if we already have
2362 # some DASH manifest succeeded. This is temporary workaround to reduce
2363 # burst of bug reports until we figure out the reason and whether it
2364 # can be fixed at all.
2365 dash_mpd_fatal = False
2366 except (ExtractorError, KeyError) as e:
2367 self.report_warning(
2368 'Skipping DASH manifest: %r' % e, video_id)
2370 # Remove the formats we found through non-DASH, they
2371 # contain less info and it can be wrong, because we use
2372 # fixed values (for example the resolution). See
2373 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2375 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2376 formats.extend(dash_formats.values())
2378 # Check for malformed aspect ratio
2379 stretched_m = re.search(
2380 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2383 w = float(stretched_m.group('w'))
2384 h = float(stretched_m.group('h'))
2385 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2386 # We will only process correct ratios.
2390 if f.get('vcodec') != 'none':
2391 f['stretched_ratio'] = ratio
2394 token = extract_token(video_info)
2396 if 'reason' in video_info:
2397 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2398 regions_allowed = self._html_search_meta(
2399 'regionsAllowed', video_webpage, default=None)
2400 countries = regions_allowed.split(',') if regions_allowed else None
2401 self.raise_geo_restricted(
2402 msg=video_info['reason'][0], countries=countries)
2403 reason = video_info['reason'][0]
2404 if 'Invalid parameters' in reason:
2405 unavailable_message = extract_unavailable_message()
2406 if unavailable_message:
2407 reason = unavailable_message
2408 raise ExtractorError(
2409 'YouTube said: %s' % reason,
2410 expected=True, video_id=video_id)
2412 raise ExtractorError(
2413 '"token" parameter not in video info for unknown reason',
2416 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2417 raise ExtractorError('This video is DRM protected.', expected=True)
2419 self._sort_formats(formats)
2421 self.mark_watched(video_id, video_info, player_response)
2425 'uploader': video_uploader,
2426 'uploader_id': video_uploader_id,
2427 'uploader_url': video_uploader_url,
2428 'channel_id': channel_id,
2429 'channel_url': channel_url,
2430 'upload_date': upload_date,
2431 'license': video_license,
2432 'creator': video_creator or artist,
2433 'title': video_title,
2434 'alt_title': video_alt_title or track,
2435 'thumbnail': video_thumbnail,
2436 'description': video_description,
2437 'categories': video_categories,
2439 'subtitles': video_subtitles,
2440 'automatic_captions': automatic_captions,
2441 'duration': video_duration,
2442 'age_limit': 18 if age_gate else 0,
2443 'annotations': video_annotations,
2444 'chapters': chapters,
2445 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2446 'view_count': view_count,
2447 'like_count': like_count,
2448 'dislike_count': dislike_count,
2449 'average_rating': average_rating,
2452 'start_time': start_time,
2453 'end_time': end_time,
2455 'season_number': season_number,
2456 'episode_number': episode_number,
2460 'release_date': release_date,
2461 'release_year': release_year,
2465 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2466 IE_DESC = 'YouTube.com playlists'
2467 _VALID_URL = r"""(?x)(?:
2472 youtube(?:kids)?\.com|
2477 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2478 \? (?:.*?[&;])*? (?:p|a|list)=
2481 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2484 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2485 # Top tracks, they can also include dots
2491 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2492 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2493 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2494 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2495 IE_NAME = 'youtube:playlist'
2497 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2499 'title': 'ytdl test PL',
2500 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2502 'playlist_count': 3,
2504 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2506 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2507 'title': 'YDL_Empty_List',
2509 'playlist_count': 0,
2510 'skip': 'This playlist is private',
2512 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2513 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2515 'title': '29C3: Not my department',
2516 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2517 'uploader': 'Christiaan008',
2518 'uploader_id': 'ChRiStIaAn008',
2520 'playlist_count': 95,
2522 'note': 'issue #673',
2523 'url': 'PLBB231211A4F62143',
2525 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2526 'id': 'PLBB231211A4F62143',
2527 'uploader': 'Wickydoo',
2528 'uploader_id': 'Wickydoo',
2530 'playlist_mincount': 26,
2532 'note': 'Large playlist',
2533 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2535 'title': 'Uploads from Cauchemar',
2536 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2537 'uploader': 'Cauchemar',
2538 'uploader_id': 'Cauchemar89',
2540 'playlist_mincount': 799,
2542 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2544 'title': 'YDL_safe_search',
2545 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2547 'playlist_count': 2,
2548 'skip': 'This playlist is private',
2551 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2552 'playlist_count': 4,
2555 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2556 'uploader': 'milan',
2557 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2560 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2561 'playlist_mincount': 485,
2563 'title': '2018 Chinese New Singles (11/6 updated)',
2564 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2566 'uploader_id': 'sdragonfang',
2569 'note': 'Embedded SWF player',
2570 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2571 'playlist_count': 4,
2574 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2576 'skip': 'This playlist does not exist',
2578 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2579 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2581 'title': 'Uploads from Interstellar Movie',
2582 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2583 'uploader': 'Interstellar Movie',
2584 'uploader_id': 'InterstellarMovie1',
2586 'playlist_mincount': 21,
2588 # Playlist URL that does not actually serve a playlist
2589 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2591 'id': 'FqZTN594JQw',
2593 'title': "Smiley's People 01 detective, Adventure Series, Action",
2594 'uploader': 'STREEM',
2595 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2597 'upload_date': '20150526',
2598 'license': 'Standard YouTube License',
2599 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2600 'categories': ['People & Blogs'],
2604 'dislike_count': int,
2607 'skip_download': True,
2609 'skip': 'This video is not available.',
2610 'add_ie': [YoutubeIE.ie_key()],
2612 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2614 'id': 'yeWKywCrFtk',
2616 'title': 'Small Scale Baler and Braiding Rugs',
2617 'uploader': 'Backus-Page House Museum',
2618 'uploader_id': 'backuspagemuseum',
2619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2620 'upload_date': '20161008',
2621 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2622 'categories': ['Nonprofits & Activism'],
2625 'dislike_count': int,
2629 'skip_download': True,
2632 # https://github.com/ytdl-org/youtube-dl/issues/21844
2633 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2635 'title': 'Data Analysis with Dr Mike Pound',
2636 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2637 'uploader_id': 'Computerphile',
2638 'uploader': 'Computerphile',
2640 'playlist_mincount': 11,
2642 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2643 'only_matching': True,
2645 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2646 'only_matching': True,
2648 # music album playlist
2649 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2650 'only_matching': True,
2652 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2653 'only_matching': True,
2655 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2656 'only_matching': True,
2659 def _real_initialize(self):
2662 def extract_videos_from_page(self, page):
2666 for item in re.findall(
2667 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2668 attrs = extract_attributes(item)
2669 video_id = attrs['data-video-id']
2670 video_title = unescapeHTML(attrs.get('data-title'))
2672 video_title = video_title.strip()
2673 ids_in_page.append(video_id)
2674 titles_in_page.append(video_title)
2676 # Fallback with old _VIDEO_RE
2677 self.extract_videos_from_page_impl(
2678 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2681 self.extract_videos_from_page_impl(
2682 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2683 ids_in_page, titles_in_page)
2684 self.extract_videos_from_page_impl(
2685 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2686 ids_in_page, titles_in_page)
2688 return zip(ids_in_page, titles_in_page)
2690 def _extract_mix(self, playlist_id):
2691 # The mixes are generated from a single video
2692 # the id of the playlist is just 'RD' + video_id
2694 last_id = playlist_id[-11:]
2695 for n in itertools.count(1):
2696 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2697 webpage = self._download_webpage(
2698 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2699 new_ids = orderedSet(re.findall(
2700 r'''(?xs)data-video-username=".*?".*?
2701 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2703 # Fetch new pages until all the videos are repeated, it seems that
2704 # there are always 51 unique videos.
2705 new_ids = [_id for _id in new_ids if _id not in ids]
2711 url_results = self._ids_to_results(ids)
2713 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2715 search_title('playlist-title')
2716 or search_title('title long-title')
2717 or search_title('title'))
2718 title = clean_html(title_span)
2720 return self.playlist_result(url_results, playlist_id, title)
2722 def _extract_playlist(self, playlist_id):
2723 url = self._TEMPLATE_URL % playlist_id
2724 page = self._download_webpage(url, playlist_id)
2726 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2727 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2728 match = match.strip()
2729 # Check if the playlist exists or is private
2730 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2732 reason = mobj.group('reason')
2733 message = 'This playlist %s' % reason
2734 if 'private' in reason:
2735 message += ', use --username or --netrc to access it'
2737 raise ExtractorError(message, expected=True)
2738 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2739 raise ExtractorError(
2740 'Invalid parameters. Maybe URL is incorrect.',
2742 elif re.match(r'[^<]*Choose your language[^<]*', match):
2745 self.report_warning('Youtube gives an alert message: ' + match)
2747 playlist_title = self._html_search_regex(
2748 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2749 page, 'title', default=None)
2751 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2752 uploader = self._html_search_regex(
2753 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2754 page, 'uploader', default=None)
2756 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2759 uploader_id = mobj.group('uploader_id')
2760 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2762 uploader_id = uploader_url = None
2766 if not playlist_title:
2768 # Some playlist URLs don't actually serve a playlist (e.g.
2769 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2770 next(self._entries(page, playlist_id))
2771 except StopIteration:
2774 playlist = self.playlist_result(
2775 self._entries(page, playlist_id), playlist_id, playlist_title)
2777 'uploader': uploader,
2778 'uploader_id': uploader_id,
2779 'uploader_url': uploader_url,
2782 return has_videos, playlist
2784 def _check_download_just_video(self, url, playlist_id):
2785 # Check if it's a video-specific URL
2786 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2787 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2788 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2789 'video id', default=None)
2791 if self._downloader.params.get('noplaylist'):
2792 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2793 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2795 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2796 return video_id, None
2799 def _real_extract(self, url):
2800 # Extract playlist id
2801 mobj = re.match(self._VALID_URL, url)
2803 raise ExtractorError('Invalid URL: %s' % url)
2804 playlist_id = mobj.group(1) or mobj.group(2)
2806 video_id, video = self._check_download_just_video(url, playlist_id)
2810 if playlist_id.startswith(('RD', 'UL', 'PU')):
2811 # Mixes require a custom extraction process
2812 return self._extract_mix(playlist_id)
2814 has_videos, playlist = self._extract_playlist(playlist_id)
2815 if has_videos or not video_id:
2818 # Some playlist URLs don't actually serve a playlist (see
2819 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2820 # Fallback to plain video extraction if there is a video id
2821 # along with playlist id.
2822 return self.url_result(video_id, 'Youtube', video_id=video_id)
2825 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2826 IE_DESC = 'YouTube.com channels'
2827 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2828 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2829 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2830 IE_NAME = 'youtube:channel'
2832 'note': 'paginated channel',
2833 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2834 'playlist_mincount': 91,
2836 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2837 'title': 'Uploads from lex will',
2838 'uploader': 'lex will',
2839 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2842 'note': 'Age restricted channel',
2843 # from https://www.youtube.com/user/DeusExOfficial
2844 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2845 'playlist_mincount': 64,
2847 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2848 'title': 'Uploads from Deus Ex',
2849 'uploader': 'Deus Ex',
2850 'uploader_id': 'DeusExOfficial',
2853 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2854 'only_matching': True,
2856 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2857 'only_matching': True,
2861 def suitable(cls, url):
2862 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2863 else super(YoutubeChannelIE, cls).suitable(url))
2865 def _build_template_url(self, url, channel_id):
2866 return self._TEMPLATE_URL % channel_id
2868 def _real_extract(self, url):
2869 channel_id = self._match_id(url)
2871 url = self._build_template_url(url, channel_id)
2873 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2874 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2875 # otherwise fallback on channel by page extraction
2876 channel_page = self._download_webpage(
2877 url + '?view=57', channel_id,
2878 'Downloading channel page', fatal=False)
2879 if channel_page is False:
2880 channel_playlist_id = False
2882 channel_playlist_id = self._html_search_meta(
2883 'channelId', channel_page, 'channel id', default=None)
2884 if not channel_playlist_id:
2885 channel_url = self._html_search_meta(
2886 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2887 channel_page, 'channel url', default=None)
2889 channel_playlist_id = self._search_regex(
2890 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2891 channel_url, 'channel id', default=None)
2892 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2893 playlist_id = 'UU' + channel_playlist_id[2:]
2894 return self.url_result(
2895 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2897 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2898 autogenerated = re.search(r'''(?x)
2900 channel-header-autogenerated-label|
2901 yt-channel-title-autogenerated
2902 )[^"]*"''', channel_page) is not None
2905 # The videos are contained in a single page
2906 # the ajax pages can't be used, they are empty
2909 video_id, 'Youtube', video_id=video_id,
2910 video_title=video_title)
2911 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2912 return self.playlist_result(entries, channel_id)
2915 next(self._entries(channel_page, channel_id))
2916 except StopIteration:
2917 alert_message = self._html_search_regex(
2918 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2919 channel_page, 'alert', default=None, group='alert')
2921 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2923 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2926 class YoutubeUserIE(YoutubeChannelIE):
2927 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2928 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2929 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2930 IE_NAME = 'youtube:user'
2933 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2934 'playlist_mincount': 320,
2936 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2937 'title': 'Uploads from The Linux Foundation',
2938 'uploader': 'The Linux Foundation',
2939 'uploader_id': 'TheLinuxFoundation',
2942 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2943 # but not https://www.youtube.com/user/12minuteathlete/videos
2944 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2945 'playlist_mincount': 249,
2947 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2948 'title': 'Uploads from 12 Minute Athlete',
2949 'uploader': '12 Minute Athlete',
2950 'uploader_id': 'the12minuteathlete',
2953 'url': 'ytuser:phihag',
2954 'only_matching': True,
2956 'url': 'https://www.youtube.com/c/gametrailers',
2957 'only_matching': True,
2959 'url': 'https://www.youtube.com/gametrailers',
2960 'only_matching': True,
2962 # This channel is not available, geo restricted to JP
2963 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2964 'only_matching': True,
2968 def suitable(cls, url):
2969 # Don't return True if the url can be extracted with other youtube
2970 # extractor, the regex would is too permissive and it would match.
2971 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2972 if any(ie.suitable(url) for ie in other_yt_ies):
2975 return super(YoutubeUserIE, cls).suitable(url)
2977 def _build_template_url(self, url, channel_id):
2978 mobj = re.match(self._VALID_URL, url)
2979 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2982 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2983 IE_DESC = 'YouTube.com live streams'
2984 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2985 IE_NAME = 'youtube:live'
2988 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2990 'id': 'a48o2S1cPoo',
2992 'title': 'The Young Turks - Live Main Show',
2993 'uploader': 'The Young Turks',
2994 'uploader_id': 'TheYoungTurks',
2995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2996 'upload_date': '20150715',
2997 'license': 'Standard YouTube License',
2998 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2999 'categories': ['News & Politics'],
3000 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3002 'dislike_count': int,
3005 'skip_download': True,
3008 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3009 'only_matching': True,
3011 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3012 'only_matching': True,
3014 'url': 'https://www.youtube.com/TheYoungTurks/live',
3015 'only_matching': True,
3018 def _real_extract(self, url):
3019 mobj = re.match(self._VALID_URL, url)
3020 channel_id = mobj.group('id')
3021 base_url = mobj.group('base_url')
3022 webpage = self._download_webpage(url, channel_id, fatal=False)
3024 page_type = self._og_search_property(
3025 'type', webpage, 'page type', default='')
3026 video_id = self._html_search_meta(
3027 'videoId', webpage, 'video id', default=None)
3028 if page_type.startswith('video') and video_id and re.match(
3029 r'^[0-9A-Za-z_-]{11}$', video_id):
3030 return self.url_result(video_id, YoutubeIE.ie_key())
3031 return self.url_result(base_url)
3034 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3035 IE_DESC = 'YouTube.com user/channel playlists'
3036 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3037 IE_NAME = 'youtube:playlists'
3040 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3041 'playlist_mincount': 4,
3043 'id': 'ThirstForScience',
3044 'title': 'ThirstForScience',
3047 # with "Load more" button
3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049 'playlist_mincount': 70,
3052 'title': 'Игорь Клейнер',
3055 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3056 'playlist_mincount': 17,
3058 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3059 'title': 'Chem Player',
3065 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3066 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3069 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3070 IE_DESC = 'YouTube.com searches'
3071 # there doesn't appear to be a real limit, for example if you search for
3072 # 'python' you get more than 8.000.000 results
3073 _MAX_RESULTS = float('inf')
3074 IE_NAME = 'youtube:search'
3075 _SEARCH_KEY = 'ytsearch'
3076 _EXTRA_QUERY_ARGS = {}
3079 def _get_n_results(self, query, n):
3080 """Get a specified number of results for a query"""
3086 'search_query': query.encode('utf-8'),
3088 url_query.update(self._EXTRA_QUERY_ARGS)
3089 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3091 for pagenum in itertools.count(1):
3092 data = self._download_json(
3093 result_url, video_id='query "%s"' % query,
3094 note='Downloading page %s' % pagenum,
3095 errnote='Unable to download API page',
3096 query={'spf': 'navigate'})
3097 html_content = data[1]['body']['content']
3099 if 'class="search-message' in html_content:
3100 raise ExtractorError(
3101 '[youtube] No video results', expected=True)
3103 new_videos = list(self._process_page(html_content))
3104 videos += new_videos
3105 if not new_videos or len(videos) > limit:
3107 next_link = self._html_search_regex(
3108 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3109 html_content, 'next link', default=None)
3110 if next_link is None:
3112 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3116 return self.playlist_result(videos, query)
3119 class YoutubeSearchDateIE(YoutubeSearchIE):
3120 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3121 _SEARCH_KEY = 'ytsearchdate'
3122 IE_DESC = 'YouTube.com searches, newest videos first'
3123 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3126 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3127 IE_DESC = 'YouTube.com search URLs'
3128 IE_NAME = 'youtube:search_url'
3129 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3131 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3132 'playlist_mincount': 5,
3134 'title': 'youtube-dl test video',
3137 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3138 'only_matching': True,
3141 def _real_extract(self, url):
3142 mobj = re.match(self._VALID_URL, url)
3143 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3144 webpage = self._download_webpage(url, query)
3145 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3148 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3149 IE_DESC = 'YouTube.com (multi-season) shows'
3150 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3151 IE_NAME = 'youtube:show'
3153 'url': 'https://www.youtube.com/show/airdisasters',
3154 'playlist_mincount': 5,
3156 'id': 'airdisasters',
3157 'title': 'Air Disasters',
3161 def _real_extract(self, url):
3162 playlist_id = self._match_id(url)
3163 return super(YoutubeShowIE, self)._real_extract(
3164 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3167 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3169 Base class for feed extractors
3170 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3172 _LOGIN_REQUIRED = True
3176 return 'youtube:%s' % self._FEED_NAME
3178 def _real_initialize(self):
3181 def _entries(self, page):
3182 # The extraction process is the same as for playlists, but the regex
3183 # for the video ids doesn't contain an index
3185 more_widget_html = content_html = page
3186 for page_num in itertools.count(1):
3187 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3189 # 'recommended' feed has infinite 'load more' and each new portion spins
3190 # the same videos in (sometimes) slightly different order, so we'll check
3191 # for unicity and break when portion has no new videos
3192 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3198 for entry in self._ids_to_results(new_ids):
3201 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3205 more = self._download_json(
3206 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3207 'Downloading page #%s' % page_num,
3208 transform_source=uppercase_escape)
3209 content_html = more['content_html']
3210 more_widget_html = more['load_more_widget_html']
3212 def _real_extract(self, url):
3213 page = self._download_webpage(
3214 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3215 self._PLAYLIST_TITLE)
3216 return self.playlist_result(
3217 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3220 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3221 IE_NAME = 'youtube:watchlater'
3222 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3223 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3226 'url': 'https://www.youtube.com/playlist?list=WL',
3227 'only_matching': True,
3229 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3230 'only_matching': True,
3233 def _real_extract(self, url):
3234 _, video = self._check_download_just_video(url, 'WL')
3237 _, playlist = self._extract_playlist('WL')
3241 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3242 IE_NAME = 'youtube:favorites'
3243 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3244 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3245 _LOGIN_REQUIRED = True
3247 def _real_extract(self, url):
3248 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3249 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3250 return self.url_result(playlist_id, 'YoutubePlaylist')
3253 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3254 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3255 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3256 _FEED_NAME = 'recommended'
3257 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3260 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3261 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3262 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3263 _FEED_NAME = 'subscriptions'
3264 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3267 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3268 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3269 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3270 _FEED_NAME = 'history'
3271 _PLAYLIST_TITLE = 'Youtube History'
3274 class YoutubeTruncatedURLIE(InfoExtractor):
3275 IE_NAME = 'youtube:truncated_url'
3276 IE_DESC = False # Do not list
3277 _VALID_URL = r'''(?x)
3279 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3282 annotation_id=annotation_[^&]+|
3288 attribution_link\?a=[^&]+
3294 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3295 'only_matching': True,
3297 'url': 'https://www.youtube.com/watch?',
3298 'only_matching': True,
3300 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3301 'only_matching': True,
3303 'url': 'https://www.youtube.com/watch?feature=foo',
3304 'only_matching': True,
3306 'url': 'https://www.youtube.com/watch?hl=en-GB',
3307 'only_matching': True,
3309 'url': 'https://www.youtube.com/watch?t=2372',
3310 'only_matching': True,
3313 def _real_extract(self, url):
3314 raise ExtractorError(
3315 'Did you forget to quote the URL? Remember that & is a meta '
3316 'character in most shells, so you want to put the URL in quotes, '
3318 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3319 ' or simply youtube-dl BaW_jenozKc .',
3323 class YoutubeTruncatedIDIE(InfoExtractor):
3324 IE_NAME = 'youtube:truncated_id'
3325 IE_DESC = False # Do not list
3326 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3329 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3330 'only_matching': True,
3333 def _real_extract(self, url):
3334 video_id = self._match_id(url)
3335 raise ExtractorError(
3336 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),