3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
36 get_element_by_attribute,
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
73 def _set_language(self):
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76 # YouTube sets the expire time to about two months
77 expire_time=time.time() + 2 * 30 * 24 * 3600)
79 def _ids_to_results(self, ids):
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 username, password = self._get_login_info()
93 # No authentication to be performed
95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
103 if login_page is False:
106 login_form = self._hidden_inputs(login_page)
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116 'f.req': json.dumps(f_req),
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
132 self._downloader.report_warning(message)
136 None, [], None, 'US', None, None, 2, False, True,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 1, [None, None, []], None, None, None, True
147 lookup_results = req(
148 self._LOOKUP_URL, lookup_req,
149 'Looking up account info', 'Unable to look up account info')
151 if lookup_results is False:
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 warn('Unable to extract user hash')
161 None, 1, None, [1, None, None, None, [password, None, True]],
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
171 if challenge_results is False:
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 warn('Unable to extract result entry')
187 login_challenge = try_get(res, lambda x: x[0][0], list)
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
193 status = try_get(login_challenge, lambda x: x[5], compat_str)
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 warn('Unable to extract TL')
203 tfa_code = self._get_tfa_info('2-step verification code')
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 tfa_code = remove_start(tfa_code, 'G-')
214 user_hash, None, 2, None,
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
224 if tfa_results is False:
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 challenge = CHALLENGES.get(
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
255 check_cookie_results = self._download_webpage(
256 check_cookie_url, None, 'Checking cookie', fatal=False)
258 if check_cookie_results is False:
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
267 def _download_webpage_handle(self, *args, **kwargs):
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272 *args, **compat_kwargs(kwargs))
274 def _real_initialize(self):
275 if self._downloader is None:
278 if not self._login():
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283 # Extract entries from page with "Load more" button
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
287 for entry in self._process_page(content_html):
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
296 while count <= retries:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
318 more_widget_html = more['load_more_widget_html']
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 video_id = mobj.group('id')
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
335 video_title = video_title.strip()
336 if video_title == '► Play all':
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
346 def extract_videos_from_page(self, page):
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
351 return zip(ids_in_page, titles_in_page)
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
365 title = self._og_search_title(webpage, fatal=False)
366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370 IE_DESC = 'YouTube.com'
371 _VALID_URL = r"""(?x)^
373 (?:https?://|//) # http(s):// or protocol-independent URL
374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375 (?:www\.)?deturl\.com/www\.youtube\.com/|
376 (?:www\.)?pwnyoutube\.com/|
377 (?:www\.)?hooktube\.com/|
378 (?:www\.)?yourepeat\.com/|
379 tube\.majestyc\.net/|
380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381 (?:(?:www|dev)\.)?invidio\.us/|
382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384 (?:www\.)?invidious\.kabi\.tk/|
385 (?:www\.)?invidious\.13ad\.de/|
386 (?:www\.)?invidious\.mastodon\.host/|
387 (?:www\.)?invidious\.nixnet\.xyz/|
388 (?:www\.)?invidious\.drycat\.fr/|
389 (?:www\.)?tube\.poal\.co/|
390 (?:www\.)?vid\.wxzm\.sx/|
391 (?:www\.)?yewtu\.be/|
392 (?:www\.)?yt\.elukerio\.org/|
393 (?:www\.)?yt\.lelux\.fi/|
394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
401 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
402 (?:.*?\#/)? # handle anchor (#/) redirect urls
403 (?: # the various things that can precede the ID:
404 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
405 |(?: # or the v= param in all its forms
406 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
407 (?:\?|\#!?) # the params delimiter ? or # or #!
408 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
413 youtu\.be| # just youtu.be/xxxx
414 vid\.plus| # or vid.plus/xxxx
415 zwearz\.com/watch| # or zwearz.com/watch/xxxx
417 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
419 )? # all until now is optional -> you can pass the naked ID
420 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
423 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
424 WL # WL are handled by the watch later IE
427 (?(1).+)? # if we found the ID, everything can follow
428 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
429 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
431 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
432 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
435 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
436 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
437 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
438 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
439 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
440 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
442 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
443 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
444 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
445 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
446 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
447 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
448 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
449 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
450 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
451 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
452 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
456 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
457 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
458 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
459 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
460 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
461 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
462 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
464 # Apple HTTP Live Streaming
465 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
466 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
468 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
469 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
470 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
471 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
472 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
475 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
481 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
482 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
485 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
486 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
490 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
491 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
492 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
493 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
494 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
495 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
498 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
504 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
505 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
514 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
518 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
522 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
523 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
525 # Dash webm audio with opus inside
526 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
527 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
528 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
531 '_rtmp': {'protocol': 'rtmp'},
533 # av01 video only formats sometimes served with "unknown" codecs
534 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
537 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
539 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
546 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
550 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
551 'uploader': 'Philipp Hagemeister',
552 'uploader_id': 'phihag',
553 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
554 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
555 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
556 'upload_date': '20121002',
557 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
558 'categories': ['Science & Technology'],
559 'tags': ['youtube-dl'],
563 'dislike_count': int,
569 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
570 'note': 'Test generic use_cipher_signature video (#897)',
574 'upload_date': '20120506',
575 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
576 'alt_title': 'I Love It (feat. Charli XCX)',
577 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
578 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
579 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
580 'iconic ep', 'iconic', 'love', 'it'],
582 'uploader': 'Icona Pop',
583 'uploader_id': 'IconaPop',
584 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
585 'creator': 'Icona Pop',
586 'track': 'I Love It (feat. Charli XCX)',
587 'artist': 'Icona Pop',
591 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
592 'note': 'Test VEVO video with age protection (#956)',
596 'upload_date': '20130703',
597 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
598 'alt_title': 'Tunnel Vision',
599 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
601 'uploader': 'justintimberlakeVEVO',
602 'uploader_id': 'justintimberlakeVEVO',
603 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
604 'creator': 'Justin Timberlake',
605 'track': 'Tunnel Vision',
606 'artist': 'Justin Timberlake',
611 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
612 'note': 'Embed-only video (#1746)',
616 'upload_date': '20120608',
617 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
618 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
619 'uploader': 'SET India',
620 'uploader_id': 'setindia',
621 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
626 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
627 'note': 'Use the first video ID in the URL',
631 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
632 'uploader': 'Philipp Hagemeister',
633 'uploader_id': 'phihag',
634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
635 'upload_date': '20121002',
636 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
637 'categories': ['Science & Technology'],
638 'tags': ['youtube-dl'],
642 'dislike_count': int,
645 'skip_download': True,
649 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
650 'note': '256k DASH audio (format 141) via DASH manifest',
654 'upload_date': '20121002',
655 'uploader_id': '8KVIDEO',
656 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
658 'uploader': '8KVIDEO',
659 'title': 'UHDTV TEST 8K VIDEO.mp4'
662 'youtube_include_dash_manifest': True,
665 'skip': 'format 141 not served anymore',
667 # DASH manifest with encrypted signature
669 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
673 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
674 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
676 'uploader': 'AfrojackVEVO',
677 'uploader_id': 'AfrojackVEVO',
678 'upload_date': '20131011',
681 'youtube_include_dash_manifest': True,
682 'format': '141/bestaudio[ext=m4a]',
685 # JS player signature function name containing $
687 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
691 'title': 'Taylor Swift - Shake It Off',
692 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
694 'uploader': 'TaylorSwiftVEVO',
695 'uploader_id': 'TaylorSwiftVEVO',
696 'upload_date': '20140818',
699 'youtube_include_dash_manifest': True,
700 'format': '141/bestaudio[ext=m4a]',
705 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
710 'upload_date': '20100909',
711 'uploader': 'Amazing Atheist',
712 'uploader_id': 'TheAmazingAtheist',
713 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
714 'title': 'Burning Everyone\'s Koran',
715 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
718 # Normal age-gate video (No vevo, embed allowed)
720 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
724 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
725 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
727 'uploader': 'The Witcher',
728 'uploader_id': 'WitcherGame',
729 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
730 'upload_date': '20140605',
734 # Age-gate video with encrypted signature
736 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
740 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
741 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
743 'uploader': 'LloydVEVO',
744 'uploader_id': 'LloydVEVO',
745 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
746 'upload_date': '20110629',
750 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
751 # YouTube Red ad is not captured for creator
753 'url': '__2ABJjxzNo',
758 'upload_date': '20100430',
759 'uploader_id': 'deadmau5',
760 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
761 'creator': 'Dada Life, deadmau5',
762 'description': 'md5:12c56784b8032162bb936a5f76d55360',
763 'uploader': 'deadmau5',
764 'title': 'Deadmau5 - Some Chords (HD)',
765 'alt_title': 'This Machine Kills Some Chords',
767 'expected_warnings': [
768 'DASH manifest missing',
771 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
773 'url': 'lqQg6PlCWgI',
778 'upload_date': '20150827',
779 'uploader_id': 'olympic',
780 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
781 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
782 'uploader': 'Olympic',
783 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
786 'skip_download': 'requires avconv',
791 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
795 'stretched_ratio': 16 / 9.,
797 'upload_date': '20110310',
798 'uploader_id': 'AllenMeow',
799 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
800 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
802 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
805 # url_encoded_fmt_stream_map is empty string
807 'url': 'qEJwOuvDf7I',
811 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
813 'upload_date': '20150404',
814 'uploader_id': 'spbelect',
815 'uploader': 'Наблюдатели Петербурга',
818 'skip_download': 'requires avconv',
820 'skip': 'This live event has ended.',
822 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
824 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
828 'title': 'md5:7b81415841e02ecd4313668cde88737a',
829 'description': 'md5:116377fd2963b81ec4ce64b542173306',
831 'upload_date': '20150625',
832 'uploader_id': 'dorappi2000',
833 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
834 'uploader': 'dorappi2000',
835 'formats': 'mincount:31',
837 'skip': 'not actual anymore',
839 # DASH manifest with segment_list
841 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
842 'md5': '8ce563a1d667b599d21064e982ab9e31',
846 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
847 'uploader': 'Airtek',
848 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
849 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
850 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
853 'youtube_include_dash_manifest': True,
854 'format': '135', # bestvideo
856 'skip': 'This live event has ended.',
859 # Multifeed videos (multiple cameras), URL is for Main Camera
860 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
863 'title': 'teamPGP: Rocket League Noob Stream',
864 'description': 'md5:dc7872fb300e143831327f1bae3af010',
870 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
871 'description': 'md5:dc7872fb300e143831327f1bae3af010',
873 'upload_date': '20150721',
874 'uploader': 'Beer Games Beer',
875 'uploader_id': 'beergamesbeer',
876 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
877 'license': 'Standard YouTube License',
883 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
884 'description': 'md5:dc7872fb300e143831327f1bae3af010',
886 'upload_date': '20150721',
887 'uploader': 'Beer Games Beer',
888 'uploader_id': 'beergamesbeer',
889 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
890 'license': 'Standard YouTube License',
896 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
897 'description': 'md5:dc7872fb300e143831327f1bae3af010',
899 'upload_date': '20150721',
900 'uploader': 'Beer Games Beer',
901 'uploader_id': 'beergamesbeer',
902 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
903 'license': 'Standard YouTube License',
909 'title': 'teamPGP: Rocket League Noob Stream (zim)',
910 'description': 'md5:dc7872fb300e143831327f1bae3af010',
912 'upload_date': '20150721',
913 'uploader': 'Beer Games Beer',
914 'uploader_id': 'beergamesbeer',
915 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
916 'license': 'Standard YouTube License',
920 'skip_download': True,
922 'skip': 'This video is not available.',
925 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
926 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
929 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
932 'skip': 'Not multifeed anymore',
935 'url': 'https://vid.plus/FlRa-iH7PGw',
936 'only_matching': True,
939 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
940 'only_matching': True,
943 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
944 # Also tests cut-off URL expansion in video description (see
945 # https://github.com/ytdl-org/youtube-dl/issues/1892,
946 # https://github.com/ytdl-org/youtube-dl/issues/8164)
947 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
951 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
952 'alt_title': 'Dark Walk - Position Music',
953 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
955 'upload_date': '20151119',
956 'uploader_id': 'IronSoulElf',
957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
958 'uploader': 'IronSoulElf',
959 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
960 'track': 'Dark Walk - Position Music',
961 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
962 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
965 'skip_download': True,
969 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
970 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
971 'only_matching': True,
974 # Video with yt:stretch=17:0
975 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
979 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
980 'description': 'md5:ee18a25c350637c8faff806845bddee9',
981 'upload_date': '20151107',
982 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
983 'uploader': 'CH GAMER DROID',
986 'skip_download': True,
988 'skip': 'This video does not exist.',
991 # Video licensed under Creative Commons
992 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
996 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
997 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
999 'upload_date': '20150127',
1000 'uploader_id': 'BerkmanCenter',
1001 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1002 'uploader': 'The Berkman Klein Center for Internet & Society',
1003 'license': 'Creative Commons Attribution license (reuse allowed)',
1006 'skip_download': True,
1010 # Channel-like uploader_url
1011 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1013 'id': 'eQcmzGIKrzg',
1015 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1016 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1018 'upload_date': '20151119',
1019 'uploader': 'Bernie Sanders',
1020 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1021 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1022 'license': 'Creative Commons Attribution license (reuse allowed)',
1025 'skip_download': True,
1029 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1030 'only_matching': True,
1033 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1034 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1035 'only_matching': True,
1038 # Rental video preview
1039 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1041 'id': 'uGpuVWrhIzE',
1043 'title': 'Piku - Trailer',
1044 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1045 'upload_date': '20150811',
1046 'uploader': 'FlixMatrix',
1047 'uploader_id': 'FlixMatrixKaravan',
1048 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1049 'license': 'Standard YouTube License',
1052 'skip_download': True,
1054 'skip': 'This video is not available.',
1057 # YouTube Red video with episode data
1058 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1060 'id': 'iqKdEhx-dD4',
1062 'title': 'Isolation - Mind Field (Ep 1)',
1063 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1065 'upload_date': '20170118',
1066 'uploader': 'Vsauce',
1067 'uploader_id': 'Vsauce',
1068 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1069 'series': 'Mind Field',
1071 'episode_number': 1,
1074 'skip_download': True,
1076 'expected_warnings': [
1077 'Skipping DASH manifest',
1081 # The following content has been identified by the YouTube community
1082 # as inappropriate or offensive to some audiences.
1083 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1085 'id': '6SJNVb0GnPI',
1087 'title': 'Race Differences in Intelligence',
1088 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1090 'upload_date': '20140124',
1091 'uploader': 'New Century Foundation',
1092 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1093 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1096 'skip_download': True,
1101 'url': '1t24XAntNCY',
1102 'only_matching': True,
1105 # geo restricted to JP
1106 'url': 'sJL6WA-aGkQ',
1107 'only_matching': True,
1110 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1111 'only_matching': True,
1114 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1115 'only_matching': True,
1119 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1120 'only_matching': True,
1123 # Video with unsupported adaptive stream type formats
1124 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1126 'id': 'Z4Vy8R84T1U',
1128 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1129 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1131 'upload_date': '20130923',
1132 'uploader': 'Amelia Putri Harwita',
1133 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1135 'formats': 'maxcount:10',
1138 'skip_download': True,
1139 'youtube_include_dash_manifest': False,
1141 'skip': 'not actual anymore',
1144 # Youtube Music Auto-generated description
1145 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1147 'id': 'MgNrAu2pzNs',
1149 'title': 'Voyeur Girl',
1150 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1151 'upload_date': '20190312',
1152 'uploader': 'Stephen - Topic',
1153 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1154 'artist': 'Stephen',
1155 'track': 'Voyeur Girl',
1156 'album': 'it\'s too much love to know my dear',
1157 'release_date': '20190313',
1158 'release_year': 2019,
1161 'skip_download': True,
1165 # Youtube Music Auto-generated description
1166 # Retrieve 'artist' field from 'Artist:' in video description
1167 # when it is present on youtube music video
1168 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1170 'id': 'k0jLE7tTwjY',
1172 'title': 'Latch Feat. Sam Smith',
1173 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1174 'upload_date': '20150110',
1175 'uploader': 'Various Artists - Topic',
1176 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1177 'artist': 'Disclosure',
1178 'track': 'Latch Feat. Sam Smith',
1179 'album': 'Latch Featuring Sam Smith',
1180 'release_date': '20121008',
1181 'release_year': 2012,
1184 'skip_download': True,
1188 # Youtube Music Auto-generated description
1189 # handle multiple artists on youtube music video
1190 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1192 'id': '74qn0eJSjpA',
1194 'title': 'Eastside',
1195 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1196 'upload_date': '20180710',
1197 'uploader': 'Benny Blanco - Topic',
1198 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1199 'artist': 'benny blanco, Halsey, Khalid',
1200 'track': 'Eastside',
1201 'album': 'Eastside',
1202 'release_date': '20180713',
1203 'release_year': 2018,
1206 'skip_download': True,
1210 # Youtube Music Auto-generated description
1211 # handle youtube music video with release_year and no release_date
1212 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1214 'id': '-hcAI0g-f5M',
1216 'title': 'Put It On Me',
1217 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1218 'upload_date': '20180426',
1219 'uploader': 'Matt Maeson - Topic',
1220 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1221 'artist': 'Matt Maeson',
1222 'track': 'Put It On Me',
1223 'album': 'The Hearse',
1224 'release_date': None,
1225 'release_year': 2018,
1228 'skip_download': True,
1232 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1233 'only_matching': True,
1236 # invalid -> valid video id redirection
1237 'url': 'DJztXj2GPfl',
1239 'id': 'DJztXj2GPfk',
1241 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1242 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1243 'upload_date': '20090125',
1244 'uploader': 'Prochorowka',
1245 'uploader_id': 'Prochorowka',
1246 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1247 'artist': 'Panjabi MC',
1248 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1249 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1252 'skip_download': True,
1257 def __init__(self, *args, **kwargs):
1258 super(YoutubeIE, self).__init__(*args, **kwargs)
1259 self._player_cache = {}
1261 def report_video_info_webpage_download(self, video_id):
1262 """Report attempt to download video info webpage."""
1263 self.to_screen('%s: Downloading video info webpage' % video_id)
1265 def report_information_extraction(self, video_id):
1266 """Report attempt to extract video information."""
1267 self.to_screen('%s: Extracting video information' % video_id)
1269 def report_unavailable_format(self, video_id, format):
1270 """Report extracted video URL."""
1271 self.to_screen('%s: Format %s not available' % (video_id, format))
1273 def report_rtmp_download(self):
1274 """Indicate the download will use the RTMP protocol."""
1275 self.to_screen('RTMP download detected')
1277 def _signature_cache_id(self, example_sig):
1278 """ Return a string representation of a signature """
1279 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1282 def _extract_player_info(cls, player_url):
1283 for player_re in cls._PLAYER_INFO_RE:
1284 id_m = re.search(player_re, player_url)
1288 raise ExtractorError('Cannot identify player %r' % player_url)
1289 return id_m.group('ext'), id_m.group('id')
1291 def _extract_signature_function(self, video_id, player_url, example_sig):
1292 player_type, player_id = self._extract_player_info(player_url)
1294 # Read from filesystem cache
1295 func_id = '%s_%s_%s' % (
1296 player_type, player_id, self._signature_cache_id(example_sig))
1297 assert os.path.basename(func_id) == func_id
1299 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1300 if cache_spec is not None:
1301 return lambda s: ''.join(s[i] for i in cache_spec)
1304 'Downloading player %s' % player_url
1305 if self._downloader.params.get('verbose') else
1306 'Downloading %s player %s' % (player_type, player_id)
1308 if player_type == 'js':
1309 code = self._download_webpage(
1310 player_url, video_id,
1312 errnote='Download of %s failed' % player_url)
1313 res = self._parse_sig_js(code)
1314 elif player_type == 'swf':
1315 urlh = self._request_webpage(
1316 player_url, video_id,
1318 errnote='Download of %s failed' % player_url)
1320 res = self._parse_sig_swf(code)
1322 assert False, 'Invalid player type %r' % player_type
1324 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1325 cache_res = res(test_string)
1326 cache_spec = [ord(c) for c in cache_res]
1328 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1331 def _print_sig_code(self, func, example_sig):
1332 def gen_sig_code(idxs):
1333 def _genslice(start, end, step):
1334 starts = '' if start == 0 else str(start)
1335 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1336 steps = '' if step == 1 else (':%d' % step)
1337 return 's[%s%s%s]' % (starts, ends, steps)
1340 # Quelch pyflakes warnings - start will be set when step is set
1341 start = '(Never used)'
1342 for i, prev in zip(idxs[1:], idxs[:-1]):
1343 if step is not None:
1344 if i - prev == step:
1346 yield _genslice(start, prev, step)
1349 if i - prev in [-1, 1]:
1354 yield 's[%d]' % prev
1358 yield _genslice(start, i, step)
1360 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1361 cache_res = func(test_string)
1362 cache_spec = [ord(c) for c in cache_res]
1363 expr_code = ' + '.join(gen_sig_code(cache_spec))
1364 signature_id_tuple = '(%s)' % (
1365 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1366 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1367 ' return %s\n') % (signature_id_tuple, expr_code)
1368 self.to_screen('Extracted signature function:\n' + code)
1370 def _parse_sig_js(self, jscode):
1371 funcname = self._search_regex(
1372 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1373 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1374 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1375 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1377 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1378 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1379 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1381 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1382 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1383 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1384 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1385 jscode, 'Initial JS player signature function name', group='sig')
1387 jsi = JSInterpreter(jscode)
1388 initial_function = jsi.extract_function(funcname)
1389 return lambda s: initial_function([s])
1391 def _parse_sig_swf(self, file_contents):
1392 swfi = SWFInterpreter(file_contents)
1393 TARGET_CLASSNAME = 'SignatureDecipher'
1394 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1395 initial_function = swfi.extract_function(searched_class, 'decipher')
1396 return lambda s: initial_function([s])
1398 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1399 """Turn the encrypted s field into a working signature"""
1401 if player_url is None:
1402 raise ExtractorError('Cannot decrypt signature without player_url')
1404 if player_url.startswith('//'):
1405 player_url = 'https:' + player_url
1406 elif not re.match(r'https?://', player_url):
1407 player_url = compat_urlparse.urljoin(
1408 'https://www.youtube.com', player_url)
1410 player_id = (player_url, self._signature_cache_id(s))
1411 if player_id not in self._player_cache:
1412 func = self._extract_signature_function(
1413 video_id, player_url, s
1415 self._player_cache[player_id] = func
1416 func = self._player_cache[player_id]
1417 if self._downloader.params.get('youtube_print_sig_code'):
1418 self._print_sig_code(func, s)
1420 except Exception as e:
1421 tb = traceback.format_exc()
1422 raise ExtractorError(
1423 'Signature extraction failed: ' + tb, cause=e)
1425 def _get_subtitles(self, video_id, webpage):
1427 subs_doc = self._download_xml(
1428 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1429 video_id, note=False)
1430 except ExtractorError as err:
1431 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1435 for track in subs_doc.findall('track'):
1436 lang = track.attrib['lang_code']
1437 if lang in sub_lang_list:
1440 for ext in self._SUBTITLE_FORMATS:
1441 params = compat_urllib_parse_urlencode({
1445 'name': track.attrib['name'].encode('utf-8'),
1447 sub_formats.append({
1448 'url': 'https://www.youtube.com/api/timedtext?' + params,
1451 sub_lang_list[lang] = sub_formats
1452 if not sub_lang_list:
1453 self._downloader.report_warning('video doesn\'t have subtitles')
1455 return sub_lang_list
1457 def _get_ytplayer_config(self, video_id, webpage):
1459 # User data may contain arbitrary character sequences that may affect
1460 # JSON extraction with regex, e.g. when '};' is contained the second
1461 # regex won't capture the whole JSON. Yet working around by trying more
1462 # concrete regex first keeping in mind proper quoted string handling
1463 # to be implemented in future that will replace this workaround (see
1464 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1465 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1466 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1467 r';ytplayer\.config\s*=\s*({.+?});',
1469 config = self._search_regex(
1470 patterns, webpage, 'ytplayer.config', default=None)
1472 return self._parse_json(
1473 uppercase_escape(config), video_id, fatal=False)
1475 def _get_automatic_captions(self, video_id, webpage):
1476 """We need the webpage for getting the captions url, pass it as an
1477 argument to speed up the process."""
1478 self.to_screen('%s: Looking for automatic captions' % video_id)
1479 player_config = self._get_ytplayer_config(video_id, webpage)
1480 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1481 if not player_config:
1482 self._downloader.report_warning(err_msg)
1485 args = player_config['args']
1486 caption_url = args.get('ttsurl')
1488 timestamp = args['timestamp']
1489 # We get the available subtitles
1490 list_params = compat_urllib_parse_urlencode({
1495 list_url = caption_url + '&' + list_params
1496 caption_list = self._download_xml(list_url, video_id)
1497 original_lang_node = caption_list.find('track')
1498 if original_lang_node is None:
1499 self._downloader.report_warning('Video doesn\'t have automatic captions')
1501 original_lang = original_lang_node.attrib['lang_code']
1502 caption_kind = original_lang_node.attrib.get('kind', '')
1505 for lang_node in caption_list.findall('target'):
1506 sub_lang = lang_node.attrib['lang_code']
1508 for ext in self._SUBTITLE_FORMATS:
1509 params = compat_urllib_parse_urlencode({
1510 'lang': original_lang,
1514 'kind': caption_kind,
1516 sub_formats.append({
1517 'url': caption_url + '&' + params,
1520 sub_lang_list[sub_lang] = sub_formats
1521 return sub_lang_list
1523 def make_captions(sub_url, sub_langs):
1524 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1525 caption_qs = compat_parse_qs(parsed_sub_url.query)
1527 for sub_lang in sub_langs:
1529 for ext in self._SUBTITLE_FORMATS:
1531 'tlang': [sub_lang],
1534 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1535 query=compat_urllib_parse_urlencode(caption_qs, True)))
1536 sub_formats.append({
1540 captions[sub_lang] = sub_formats
1543 # New captions format as of 22.06.2017
1544 player_response = args.get('player_response')
1545 if player_response and isinstance(player_response, compat_str):
1546 player_response = self._parse_json(
1547 player_response, video_id, fatal=False)
1549 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1550 base_url = renderer['captionTracks'][0]['baseUrl']
1552 for lang in renderer['translationLanguages']:
1553 lang_code = lang.get('languageCode')
1555 sub_lang_list.append(lang_code)
1556 return make_captions(base_url, sub_lang_list)
1558 # Some videos don't provide ttsurl but rather caption_tracks and
1559 # caption_translation_languages (e.g. 20LmZk1hakA)
1560 # Does not used anymore as of 22.06.2017
1561 caption_tracks = args['caption_tracks']
1562 caption_translation_languages = args['caption_translation_languages']
1563 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1565 for lang in caption_translation_languages.split(','):
1566 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1567 sub_lang = lang_qs.get('lc', [None])[0]
1569 sub_lang_list.append(sub_lang)
1570 return make_captions(caption_url, sub_lang_list)
1571 # An extractor error can be raise by the download process if there are
1572 # no automatic captions but there are subtitles
1573 except (KeyError, IndexError, ExtractorError):
1574 self._downloader.report_warning(err_msg)
1577 def _mark_watched(self, video_id, video_info, player_response):
1578 playback_url = url_or_none(try_get(
1580 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1581 video_info, lambda x: x['videostats_playback_base_url'][0]))
1582 if not playback_url:
1584 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1585 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1587 # cpn generation algorithm is reverse engineered from base.js.
1588 # In fact it works even with dummy cpn.
1589 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1590 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1596 playback_url = compat_urlparse.urlunparse(
1597 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1599 self._download_webpage(
1600 playback_url, video_id, 'Marking watched',
1601 'Unable to mark watched', fatal=False)
1604 def _extract_urls(webpage):
1605 # Embedded YouTube player
1607 unescapeHTML(mobj.group('url'))
1608 for mobj in re.finditer(r'''(?x)
1618 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1619 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1622 # lazyYT YouTube embed
1623 entries.extend(list(map(
1625 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1627 # Wordpress "YouTube Video Importer" plugin
1628 matches = re.findall(r'''(?x)<div[^>]+
1629 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1630 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1631 entries.extend(m[-1] for m in matches)
1636 def _extract_url(webpage):
1637 urls = YoutubeIE._extract_urls(webpage)
1638 return urls[0] if urls else None
1641 def extract_id(cls, url):
1642 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1644 raise ExtractorError('Invalid URL: %s' % url)
1645 video_id = mobj.group(2)
1649 def _extract_chapters(description, duration):
1652 chapter_lines = re.findall(
1653 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1655 if not chapter_lines:
1658 for next_num, (chapter_line, time_point) in enumerate(
1659 chapter_lines, start=1):
1660 start_time = parse_duration(time_point)
1661 if start_time is None:
1663 if start_time > duration:
1665 end_time = (duration if next_num == len(chapter_lines)
1666 else parse_duration(chapter_lines[next_num][1]))
1667 if end_time is None:
1669 if end_time > duration:
1671 if start_time > end_time:
1673 chapter_title = re.sub(
1674 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1675 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1677 'start_time': start_time,
1678 'end_time': end_time,
1679 'title': chapter_title,
1683 def _real_extract(self, url):
1684 url, smuggled_data = unsmuggle_url(url, {})
1687 'http' if self._downloader.params.get('prefer_insecure', False)
1692 parsed_url = compat_urllib_parse_urlparse(url)
1693 for component in [parsed_url.fragment, parsed_url.query]:
1694 query = compat_parse_qs(component)
1695 if start_time is None and 't' in query:
1696 start_time = parse_duration(query['t'][0])
1697 if start_time is None and 'start' in query:
1698 start_time = parse_duration(query['start'][0])
1699 if end_time is None and 'end' in query:
1700 end_time = parse_duration(query['end'][0])
1702 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1703 mobj = re.search(self._NEXT_URL_RE, url)
1705 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1706 video_id = self.extract_id(url)
1709 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1710 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1712 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1713 video_id = qs.get('v', [None])[0] or video_id
1715 # Attempt to extract SWF player URL
1716 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1717 if mobj is not None:
1718 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1724 def add_dash_mpd(video_info):
1725 dash_mpd = video_info.get('dashmpd')
1726 if dash_mpd and dash_mpd[0] not in dash_mpds:
1727 dash_mpds.append(dash_mpd[0])
1729 def add_dash_mpd_pr(pl_response):
1730 dash_mpd = url_or_none(try_get(
1731 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1733 if dash_mpd and dash_mpd not in dash_mpds:
1734 dash_mpds.append(dash_mpd)
1739 def extract_view_count(v_info):
1740 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1742 def extract_player_response(player_response, video_id):
1743 pl_response = str_or_none(player_response)
1746 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1747 if isinstance(pl_response, dict):
1748 add_dash_mpd_pr(pl_response)
1751 player_response = {}
1755 embed_webpage = None
1756 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1758 # We simulate the access to the video from www.youtube.com/v/{video_id}
1759 # this can be viewed without login into Youtube
1760 url = proto + '://www.youtube.com/embed/%s' % video_id
1761 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1762 data = compat_urllib_parse_urlencode({
1763 'video_id': video_id,
1764 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1765 'sts': self._search_regex(
1766 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1768 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1770 video_info_webpage = self._download_webpage(
1771 video_info_url, video_id,
1772 note='Refetching age-gated info webpage',
1773 errnote='unable to download video info webpage')
1774 except ExtractorError:
1775 video_info_webpage = None
1776 if video_info_webpage:
1777 video_info = compat_parse_qs(video_info_webpage)
1778 pl_response = video_info.get('player_response', [None])[0]
1779 player_response = extract_player_response(pl_response, video_id)
1780 add_dash_mpd(video_info)
1781 view_count = extract_view_count(video_info)
1784 # Try looking directly into the video webpage
1785 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1787 args = ytplayer_config['args']
1788 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1789 # Convert to the same format returned by compat_parse_qs
1790 video_info = dict((k, [v]) for k, v in args.items())
1791 add_dash_mpd(video_info)
1792 # Rental video is not rented but preview is available (e.g.
1793 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1794 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1795 if not video_info and args.get('ypc_vid'):
1796 return self.url_result(
1797 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1798 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1800 if not player_response:
1801 player_response = extract_player_response(args.get('player_response'), video_id)
1802 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1803 add_dash_mpd_pr(player_response)
1805 def extract_unavailable_message():
1807 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1808 msg = self._html_search_regex(
1809 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1810 video_webpage, 'unavailable %s' % kind, default=None)
1812 messages.append(msg)
1814 return '\n'.join(messages)
1816 if not video_info and not player_response:
1817 unavailable_message = extract_unavailable_message()
1818 if not unavailable_message:
1819 unavailable_message = 'Unable to extract video data'
1820 raise ExtractorError(
1821 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1823 if not isinstance(video_info, dict):
1826 video_details = try_get(
1827 player_response, lambda x: x['videoDetails'], dict) or {}
1829 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1831 self._downloader.report_warning('Unable to extract video title')
1834 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1835 if video_description:
1838 redir_url = compat_urlparse.urljoin(url, m.group(1))
1839 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1840 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1841 qs = compat_parse_qs(parsed_redir_url.query)
1847 description_original = video_description = re.sub(r'''(?x)
1849 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1850 (?:title|href)="([^"]+)"\s+
1851 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1855 ''', replace_url, video_description)
1856 video_description = clean_html(video_description)
1858 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1860 if not smuggled_data.get('force_singlefeed', False):
1861 if not self._downloader.params.get('noplaylist'):
1862 multifeed_metadata_list = try_get(
1864 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1865 compat_str) or try_get(
1866 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1867 if multifeed_metadata_list:
1870 for feed in multifeed_metadata_list.split(','):
1871 # Unquote should take place before split on comma (,) since textual
1872 # fields may contain comma as well (see
1873 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1874 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1876 def feed_entry(name):
1877 return try_get(feed_data, lambda x: x[name][0], compat_str)
1879 feed_id = feed_entry('id')
1882 feed_title = feed_entry('title')
1885 title += ' (%s)' % feed_title
1887 '_type': 'url_transparent',
1888 'ie_key': 'Youtube',
1890 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1891 {'force_singlefeed': True}),
1894 feed_ids.append(feed_id)
1896 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1897 % (', '.join(feed_ids), video_id))
1898 return self.playlist_result(entries, video_id, video_title, video_description)
1900 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1902 if view_count is None:
1903 view_count = extract_view_count(video_info)
1904 if view_count is None and video_details:
1905 view_count = int_or_none(video_details.get('viewCount'))
1908 is_live = bool_or_none(video_details.get('isLive'))
1910 # Check for "rental" videos
1911 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1912 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1914 def _extract_filesize(media_url):
1915 return int_or_none(self._search_regex(
1916 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1918 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1919 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1921 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1922 self.report_rtmp_download()
1924 'format_id': '_rtmp',
1926 'url': video_info['conn'][0],
1927 'player_url': player_url,
1929 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1930 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1931 if 'rtmpe%3Dyes' in encoded_url_map:
1932 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1935 fmt_list = video_info.get('fmt_list', [''])[0]
1937 for fmt in fmt_list.split(','):
1938 spec = fmt.split('/')
1940 width_height = spec[1].split('x')
1941 if len(width_height) == 2:
1942 formats_spec[spec[0]] = {
1943 'resolution': spec[1],
1944 'width': int_or_none(width_height[0]),
1945 'height': int_or_none(width_height[1]),
1947 for fmt in streaming_formats:
1948 itag = str_or_none(fmt.get('itag'))
1951 quality = fmt.get('quality')
1952 quality_label = fmt.get('qualityLabel') or quality
1953 formats_spec[itag] = {
1954 'asr': int_or_none(fmt.get('audioSampleRate')),
1955 'filesize': int_or_none(fmt.get('contentLength')),
1956 'format_note': quality_label,
1957 'fps': int_or_none(fmt.get('fps')),
1958 'height': int_or_none(fmt.get('height')),
1959 # bitrate for itag 43 is always 2147483647
1960 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1961 'width': int_or_none(fmt.get('width')),
1964 for fmt in streaming_formats:
1965 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1967 url = url_or_none(fmt.get('url'))
1970 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1973 url_data = compat_parse_qs(cipher)
1974 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1979 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1981 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1982 # Unsupported FORMAT_STREAM_TYPE_OTF
1983 if stream_type == 3:
1986 format_id = fmt.get('itag') or url_data['itag'][0]
1989 format_id = compat_str(format_id)
1992 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1993 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1994 jsplayer_url_json = self._search_regex(
1996 embed_webpage if age_gate else video_webpage,
1997 'JS player URL (1)', default=None)
1998 if not jsplayer_url_json and not age_gate:
1999 # We need the embed website after all
2000 if embed_webpage is None:
2001 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2002 embed_webpage = self._download_webpage(
2003 embed_url, video_id, 'Downloading embed webpage')
2004 jsplayer_url_json = self._search_regex(
2005 ASSETS_RE, embed_webpage, 'JS player URL')
2007 player_url = json.loads(jsplayer_url_json)
2008 if player_url is None:
2009 player_url_json = self._search_regex(
2010 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2011 video_webpage, 'age gate player URL')
2012 player_url = json.loads(player_url_json)
2014 if 'sig' in url_data:
2015 url += '&signature=' + url_data['sig'][0]
2016 elif 's' in url_data:
2017 encrypted_sig = url_data['s'][0]
2019 if self._downloader.params.get('verbose'):
2020 if player_url is None:
2021 player_desc = 'unknown'
2023 player_type, player_version = self._extract_player_info(player_url)
2024 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2025 parts_sizes = self._signature_cache_id(encrypted_sig)
2026 self.to_screen('{%s} signature length %s, %s' %
2027 (format_id, parts_sizes, player_desc))
2029 signature = self._decrypt_signature(
2030 encrypted_sig, video_id, player_url, age_gate)
2031 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2032 url += '&%s=%s' % (sp, signature)
2033 if 'ratebypass' not in url:
2034 url += '&ratebypass=yes'
2037 'format_id': format_id,
2039 'player_url': player_url,
2041 if format_id in self._formats:
2042 dct.update(self._formats[format_id])
2043 if format_id in formats_spec:
2044 dct.update(formats_spec[format_id])
2046 # Some itags are not included in DASH manifest thus corresponding formats will
2047 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2048 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2049 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2050 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2053 width = int_or_none(fmt.get('width'))
2055 height = int_or_none(fmt.get('height'))
2057 filesize = int_or_none(url_data.get(
2058 'clen', [None])[0]) or _extract_filesize(url)
2060 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2061 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2063 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2064 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2065 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2068 'filesize': filesize,
2073 'format_note': quality_label or quality,
2075 for key, value in more_fields.items():
2078 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2080 type_split = type_.split(';')
2081 kind_ext = type_split[0].split('/')
2082 if len(kind_ext) == 2:
2084 dct['ext'] = mimetype2ext(type_split[0])
2085 if kind in ('audio', 'video'):
2087 for mobj in re.finditer(
2088 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2089 if mobj.group('key') == 'codecs':
2090 codecs = mobj.group('val')
2093 dct.update(parse_codecs(codecs))
2094 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2095 dct['downloader_options'] = {
2096 # Youtube throttles chunks >~10M
2097 'http_chunk_size': 10485760,
2102 url_or_none(try_get(
2104 lambda x: x['streamingData']['hlsManifestUrl'],
2106 or url_or_none(try_get(
2107 video_info, lambda x: x['hlsvp'][0], compat_str)))
2110 m3u8_formats = self._extract_m3u8_formats(
2111 manifest_url, video_id, 'mp4', fatal=False)
2112 for a_format in m3u8_formats:
2113 itag = self._search_regex(
2114 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2116 a_format['format_id'] = itag
2117 if itag in self._formats:
2118 dct = self._formats[itag].copy()
2119 dct.update(a_format)
2121 a_format['player_url'] = player_url
2122 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2123 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2124 formats.append(a_format)
2126 error_message = extract_unavailable_message()
2127 if not error_message:
2128 error_message = clean_html(try_get(
2129 player_response, lambda x: x['playabilityStatus']['reason'],
2131 if not error_message:
2132 error_message = clean_html(
2133 try_get(video_info, lambda x: x['reason'][0], compat_str))
2135 raise ExtractorError(error_message, expected=True)
2136 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2139 video_uploader = try_get(
2140 video_info, lambda x: x['author'][0],
2141 compat_str) or str_or_none(video_details.get('author'))
2143 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2145 self._downloader.report_warning('unable to extract uploader name')
2148 video_uploader_id = None
2149 video_uploader_url = None
2151 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2153 if mobj is not None:
2154 video_uploader_id = mobj.group('uploader_id')
2155 video_uploader_url = mobj.group('uploader_url')
2157 self._downloader.report_warning('unable to extract uploader nickname')
2160 str_or_none(video_details.get('channelId'))
2161 or self._html_search_meta(
2162 'channelId', video_webpage, 'channel id', default=None)
2163 or self._search_regex(
2164 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2165 video_webpage, 'channel id', default=None, group='id'))
2166 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2169 # We try first to get a high quality image:
2170 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2171 video_webpage, re.DOTALL)
2172 if m_thumb is not None:
2173 video_thumbnail = m_thumb.group(1)
2174 elif 'thumbnail_url' not in video_info:
2175 self._downloader.report_warning('unable to extract video thumbnail')
2176 video_thumbnail = None
2177 else: # don't panic if we can't find it
2178 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2181 upload_date = self._html_search_meta(
2182 'datePublished', video_webpage, 'upload date', default=None)
2184 upload_date = self._search_regex(
2185 [r'(?s)id="eow-date.*?>(.*?)</span>',
2186 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2187 video_webpage, 'upload date', default=None)
2188 upload_date = unified_strdate(upload_date)
2190 video_license = self._html_search_regex(
2191 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2192 video_webpage, 'license', default=None)
2194 m_music = re.search(
2196 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2204 \bhref=["\']/red[^>]*>| # drop possible
2205 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2212 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2213 video_creator = clean_html(m_music.group('creator'))
2215 video_alt_title = video_creator = None
2217 def extract_meta(field):
2218 return self._html_search_regex(
2219 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2220 video_webpage, field, default=None)
2222 track = extract_meta('Song')
2223 artist = extract_meta('Artist')
2224 album = extract_meta('Album')
2226 # Youtube Music Auto-generated description
2227 release_date = release_year = None
2228 if video_description:
2229 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2232 track = mobj.group('track').strip()
2234 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2236 album = mobj.group('album'.strip())
2237 release_year = mobj.group('release_year')
2238 release_date = mobj.group('release_date')
2240 release_date = release_date.replace('-', '')
2241 if not release_year:
2242 release_year = int(release_date[:4])
2244 release_year = int(release_year)
2246 m_episode = re.search(
2247 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2250 series = unescapeHTML(m_episode.group('series'))
2251 season_number = int(m_episode.group('season'))
2252 episode_number = int(m_episode.group('episode'))
2254 series = season_number = episode_number = None
2256 m_cat_container = self._search_regex(
2257 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2258 video_webpage, 'categories', default=None)
2260 category = self._html_search_regex(
2261 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2263 video_categories = None if category is None else [category]
2265 video_categories = None
2268 unescapeHTML(m.group('content'))
2269 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2271 def _extract_count(count_name):
2272 return str_to_int(self._search_regex(
2273 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2274 % re.escape(count_name),
2275 video_webpage, count_name, default=None))
2277 like_count = _extract_count('like')
2278 dislike_count = _extract_count('dislike')
2280 if view_count is None:
2281 view_count = str_to_int(self._search_regex(
2282 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2283 'view count', default=None))
2286 float_or_none(video_details.get('averageRating'))
2287 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2290 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2291 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2293 video_duration = try_get(
2294 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2295 if not video_duration:
2296 video_duration = int_or_none(video_details.get('lengthSeconds'))
2297 if not video_duration:
2298 video_duration = parse_duration(self._html_search_meta(
2299 'duration', video_webpage, 'video duration'))
2302 video_annotations = None
2303 if self._downloader.params.get('writeannotations', False):
2304 xsrf_token = self._search_regex(
2305 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2306 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2307 invideo_url = try_get(
2308 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2309 if xsrf_token and invideo_url:
2310 xsrf_field_name = self._search_regex(
2311 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2312 video_webpage, 'xsrf field name',
2313 group='xsrf_field_name', default='session_token')
2314 video_annotations = self._download_webpage(
2315 self._proto_relative_url(invideo_url),
2316 video_id, note='Downloading annotations',
2317 errnote='Unable to download video annotations', fatal=False,
2318 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2320 chapters = self._extract_chapters(description_original, video_duration)
2322 # Look for the DASH manifest
2323 if self._downloader.params.get('youtube_include_dash_manifest', True):
2324 dash_mpd_fatal = True
2325 for mpd_url in dash_mpds:
2328 def decrypt_sig(mobj):
2330 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2331 return '/signature/%s' % dec_s
2333 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2335 for df in self._extract_mpd_formats(
2336 mpd_url, video_id, fatal=dash_mpd_fatal,
2337 formats_dict=self._formats):
2338 if not df.get('filesize'):
2339 df['filesize'] = _extract_filesize(df['url'])
2340 # Do not overwrite DASH format found in some previous DASH manifest
2341 if df['format_id'] not in dash_formats:
2342 dash_formats[df['format_id']] = df
2343 # Additional DASH manifests may end up in HTTP Error 403 therefore
2344 # allow them to fail without bug report message if we already have
2345 # some DASH manifest succeeded. This is temporary workaround to reduce
2346 # burst of bug reports until we figure out the reason and whether it
2347 # can be fixed at all.
2348 dash_mpd_fatal = False
2349 except (ExtractorError, KeyError) as e:
2350 self.report_warning(
2351 'Skipping DASH manifest: %r' % e, video_id)
2353 # Remove the formats we found through non-DASH, they
2354 # contain less info and it can be wrong, because we use
2355 # fixed values (for example the resolution). See
2356 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2358 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2359 formats.extend(dash_formats.values())
2361 # Check for malformed aspect ratio
2362 stretched_m = re.search(
2363 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2366 w = float(stretched_m.group('w'))
2367 h = float(stretched_m.group('h'))
2368 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2369 # We will only process correct ratios.
2373 if f.get('vcodec') != 'none':
2374 f['stretched_ratio'] = ratio
2377 if 'reason' in video_info:
2378 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2379 regions_allowed = self._html_search_meta(
2380 'regionsAllowed', video_webpage, default=None)
2381 countries = regions_allowed.split(',') if regions_allowed else None
2382 self.raise_geo_restricted(
2383 msg=video_info['reason'][0], countries=countries)
2384 reason = video_info['reason'][0]
2385 if 'Invalid parameters' in reason:
2386 unavailable_message = extract_unavailable_message()
2387 if unavailable_message:
2388 reason = unavailable_message
2389 raise ExtractorError(
2390 'YouTube said: %s' % reason,
2391 expected=True, video_id=video_id)
2392 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2393 raise ExtractorError('This video is DRM protected.', expected=True)
2395 self._sort_formats(formats)
2397 self.mark_watched(video_id, video_info, player_response)
2401 'uploader': video_uploader,
2402 'uploader_id': video_uploader_id,
2403 'uploader_url': video_uploader_url,
2404 'channel_id': channel_id,
2405 'channel_url': channel_url,
2406 'upload_date': upload_date,
2407 'license': video_license,
2408 'creator': video_creator or artist,
2409 'title': video_title,
2410 'alt_title': video_alt_title or track,
2411 'thumbnail': video_thumbnail,
2412 'description': video_description,
2413 'categories': video_categories,
2415 'subtitles': video_subtitles,
2416 'automatic_captions': automatic_captions,
2417 'duration': video_duration,
2418 'age_limit': 18 if age_gate else 0,
2419 'annotations': video_annotations,
2420 'chapters': chapters,
2421 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2422 'view_count': view_count,
2423 'like_count': like_count,
2424 'dislike_count': dislike_count,
2425 'average_rating': average_rating,
2428 'start_time': start_time,
2429 'end_time': end_time,
2431 'season_number': season_number,
2432 'episode_number': episode_number,
2436 'release_date': release_date,
2437 'release_year': release_year,
2441 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2442 IE_DESC = 'YouTube.com playlists'
2443 _VALID_URL = r"""(?x)(?:
2448 youtube(?:kids)?\.com|
2453 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2454 \? (?:.*?[&;])*? (?:p|a|list)=
2457 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2460 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2461 # Top tracks, they can also include dots
2467 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2468 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2469 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2470 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2471 IE_NAME = 'youtube:playlist'
2473 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2475 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2476 'uploader': 'Sergey M.',
2477 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2478 'title': 'youtube-dl public playlist',
2480 'playlist_count': 1,
2482 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2484 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2485 'uploader': 'Sergey M.',
2486 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2487 'title': 'youtube-dl empty playlist',
2489 'playlist_count': 0,
2491 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2492 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2494 'title': '29C3: Not my department',
2495 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2496 'uploader': 'Christiaan008',
2497 'uploader_id': 'ChRiStIaAn008',
2499 'playlist_count': 96,
2501 'note': 'issue #673',
2502 'url': 'PLBB231211A4F62143',
2504 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2505 'id': 'PLBB231211A4F62143',
2506 'uploader': 'Wickydoo',
2507 'uploader_id': 'Wickydoo',
2509 'playlist_mincount': 26,
2511 'note': 'Large playlist',
2512 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2514 'title': 'Uploads from Cauchemar',
2515 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2516 'uploader': 'Cauchemar',
2517 'uploader_id': 'Cauchemar89',
2519 'playlist_mincount': 799,
2521 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2523 'title': 'YDL_safe_search',
2524 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2526 'playlist_count': 2,
2527 'skip': 'This playlist is private',
2530 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2531 'playlist_count': 4,
2534 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2535 'uploader': 'milan',
2536 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2539 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2540 'playlist_mincount': 485,
2542 'title': '2018 Chinese New Singles (11/6 updated)',
2543 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2545 'uploader_id': 'sdragonfang',
2548 'note': 'Embedded SWF player',
2549 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2550 'playlist_count': 4,
2553 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2555 'skip': 'This playlist does not exist',
2557 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2558 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2560 'title': 'Uploads from Interstellar Movie',
2561 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2562 'uploader': 'Interstellar Movie',
2563 'uploader_id': 'InterstellarMovie1',
2565 'playlist_mincount': 21,
2567 # Playlist URL that does not actually serve a playlist
2568 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2570 'id': 'FqZTN594JQw',
2572 'title': "Smiley's People 01 detective, Adventure Series, Action",
2573 'uploader': 'STREEM',
2574 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2575 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2576 'upload_date': '20150526',
2577 'license': 'Standard YouTube License',
2578 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2579 'categories': ['People & Blogs'],
2583 'dislike_count': int,
2586 'skip_download': True,
2588 'skip': 'This video is not available.',
2589 'add_ie': [YoutubeIE.ie_key()],
2591 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2593 'id': 'yeWKywCrFtk',
2595 'title': 'Small Scale Baler and Braiding Rugs',
2596 'uploader': 'Backus-Page House Museum',
2597 'uploader_id': 'backuspagemuseum',
2598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2599 'upload_date': '20161008',
2600 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2601 'categories': ['Nonprofits & Activism'],
2604 'dislike_count': int,
2608 'skip_download': True,
2611 # https://github.com/ytdl-org/youtube-dl/issues/21844
2612 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2614 'title': 'Data Analysis with Dr Mike Pound',
2615 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2616 'uploader_id': 'Computerphile',
2617 'uploader': 'Computerphile',
2619 'playlist_mincount': 11,
2621 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2622 'only_matching': True,
2624 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2625 'only_matching': True,
2627 # music album playlist
2628 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2629 'only_matching': True,
2631 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2632 'only_matching': True,
2634 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2635 'only_matching': True,
2638 def _real_initialize(self):
2641 def extract_videos_from_page(self, page):
2645 for item in re.findall(
2646 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2647 attrs = extract_attributes(item)
2648 video_id = attrs['data-video-id']
2649 video_title = unescapeHTML(attrs.get('data-title'))
2651 video_title = video_title.strip()
2652 ids_in_page.append(video_id)
2653 titles_in_page.append(video_title)
2655 # Fallback with old _VIDEO_RE
2656 self.extract_videos_from_page_impl(
2657 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2660 self.extract_videos_from_page_impl(
2661 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2662 ids_in_page, titles_in_page)
2663 self.extract_videos_from_page_impl(
2664 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2665 ids_in_page, titles_in_page)
2667 return zip(ids_in_page, titles_in_page)
2669 def _extract_mix(self, playlist_id):
2670 # The mixes are generated from a single video
2671 # the id of the playlist is just 'RD' + video_id
2673 last_id = playlist_id[-11:]
2674 for n in itertools.count(1):
2675 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2676 webpage = self._download_webpage(
2677 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2678 new_ids = orderedSet(re.findall(
2679 r'''(?xs)data-video-username=".*?".*?
2680 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2682 # Fetch new pages until all the videos are repeated, it seems that
2683 # there are always 51 unique videos.
2684 new_ids = [_id for _id in new_ids if _id not in ids]
2690 url_results = self._ids_to_results(ids)
2692 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2694 search_title('playlist-title')
2695 or search_title('title long-title')
2696 or search_title('title'))
2697 title = clean_html(title_span)
2699 return self.playlist_result(url_results, playlist_id, title)
2701 def _extract_playlist(self, playlist_id):
2702 url = self._TEMPLATE_URL % playlist_id
2703 page = self._download_webpage(url, playlist_id)
2705 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2706 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2707 match = match.strip()
2708 # Check if the playlist exists or is private
2709 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2711 reason = mobj.group('reason')
2712 message = 'This playlist %s' % reason
2713 if 'private' in reason:
2714 message += ', use --username or --netrc to access it'
2716 raise ExtractorError(message, expected=True)
2717 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2718 raise ExtractorError(
2719 'Invalid parameters. Maybe URL is incorrect.',
2721 elif re.match(r'[^<]*Choose your language[^<]*', match):
2724 self.report_warning('Youtube gives an alert message: ' + match)
2726 playlist_title = self._html_search_regex(
2727 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2728 page, 'title', default=None)
2730 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2731 uploader = self._html_search_regex(
2732 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2733 page, 'uploader', default=None)
2735 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2738 uploader_id = mobj.group('uploader_id')
2739 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2741 uploader_id = uploader_url = None
2745 if not playlist_title:
2747 # Some playlist URLs don't actually serve a playlist (e.g.
2748 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2749 next(self._entries(page, playlist_id))
2750 except StopIteration:
2753 playlist = self.playlist_result(
2754 self._entries(page, playlist_id), playlist_id, playlist_title)
2756 'uploader': uploader,
2757 'uploader_id': uploader_id,
2758 'uploader_url': uploader_url,
2761 return has_videos, playlist
2763 def _check_download_just_video(self, url, playlist_id):
2764 # Check if it's a video-specific URL
2765 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2766 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2767 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2768 'video id', default=None)
2770 if self._downloader.params.get('noplaylist'):
2771 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2772 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2774 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2775 return video_id, None
2778 def _real_extract(self, url):
2779 # Extract playlist id
2780 mobj = re.match(self._VALID_URL, url)
2782 raise ExtractorError('Invalid URL: %s' % url)
2783 playlist_id = mobj.group(1) or mobj.group(2)
2785 video_id, video = self._check_download_just_video(url, playlist_id)
2789 if playlist_id.startswith(('RD', 'UL', 'PU')):
2790 # Mixes require a custom extraction process
2791 return self._extract_mix(playlist_id)
2793 has_videos, playlist = self._extract_playlist(playlist_id)
2794 if has_videos or not video_id:
2797 # Some playlist URLs don't actually serve a playlist (see
2798 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2799 # Fallback to plain video extraction if there is a video id
2800 # along with playlist id.
2801 return self.url_result(video_id, 'Youtube', video_id=video_id)
2804 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2805 IE_DESC = 'YouTube.com channels'
2806 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2807 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2808 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2809 IE_NAME = 'youtube:channel'
2811 'note': 'paginated channel',
2812 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2813 'playlist_mincount': 91,
2815 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2816 'title': 'Uploads from lex will',
2817 'uploader': 'lex will',
2818 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2821 'note': 'Age restricted channel',
2822 # from https://www.youtube.com/user/DeusExOfficial
2823 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2824 'playlist_mincount': 64,
2826 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2827 'title': 'Uploads from Deus Ex',
2828 'uploader': 'Deus Ex',
2829 'uploader_id': 'DeusExOfficial',
2832 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2833 'only_matching': True,
2835 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2836 'only_matching': True,
2840 def suitable(cls, url):
2841 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2842 else super(YoutubeChannelIE, cls).suitable(url))
2844 def _build_template_url(self, url, channel_id):
2845 return self._TEMPLATE_URL % channel_id
2847 def _real_extract(self, url):
2848 channel_id = self._match_id(url)
2850 url = self._build_template_url(url, channel_id)
2852 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2853 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2854 # otherwise fallback on channel by page extraction
2855 channel_page = self._download_webpage(
2856 url + '?view=57', channel_id,
2857 'Downloading channel page', fatal=False)
2858 if channel_page is False:
2859 channel_playlist_id = False
2861 channel_playlist_id = self._html_search_meta(
2862 'channelId', channel_page, 'channel id', default=None)
2863 if not channel_playlist_id:
2864 channel_url = self._html_search_meta(
2865 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2866 channel_page, 'channel url', default=None)
2868 channel_playlist_id = self._search_regex(
2869 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2870 channel_url, 'channel id', default=None)
2871 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2872 playlist_id = 'UU' + channel_playlist_id[2:]
2873 return self.url_result(
2874 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2876 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2877 autogenerated = re.search(r'''(?x)
2879 channel-header-autogenerated-label|
2880 yt-channel-title-autogenerated
2881 )[^"]*"''', channel_page) is not None
2884 # The videos are contained in a single page
2885 # the ajax pages can't be used, they are empty
2888 video_id, 'Youtube', video_id=video_id,
2889 video_title=video_title)
2890 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2891 return self.playlist_result(entries, channel_id)
2894 next(self._entries(channel_page, channel_id))
2895 except StopIteration:
2896 alert_message = self._html_search_regex(
2897 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2898 channel_page, 'alert', default=None, group='alert')
2900 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2902 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2905 class YoutubeUserIE(YoutubeChannelIE):
2906 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2907 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2908 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2909 IE_NAME = 'youtube:user'
2912 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2913 'playlist_mincount': 320,
2915 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2916 'title': 'Uploads from The Linux Foundation',
2917 'uploader': 'The Linux Foundation',
2918 'uploader_id': 'TheLinuxFoundation',
2921 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2922 # but not https://www.youtube.com/user/12minuteathlete/videos
2923 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2924 'playlist_mincount': 249,
2926 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2927 'title': 'Uploads from 12 Minute Athlete',
2928 'uploader': '12 Minute Athlete',
2929 'uploader_id': 'the12minuteathlete',
2932 'url': 'ytuser:phihag',
2933 'only_matching': True,
2935 'url': 'https://www.youtube.com/c/gametrailers',
2936 'only_matching': True,
2938 'url': 'https://www.youtube.com/gametrailers',
2939 'only_matching': True,
2941 # This channel is not available, geo restricted to JP
2942 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2943 'only_matching': True,
2947 def suitable(cls, url):
2948 # Don't return True if the url can be extracted with other youtube
2949 # extractor, the regex would is too permissive and it would match.
2950 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2951 if any(ie.suitable(url) for ie in other_yt_ies):
2954 return super(YoutubeUserIE, cls).suitable(url)
2956 def _build_template_url(self, url, channel_id):
2957 mobj = re.match(self._VALID_URL, url)
2958 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2961 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2962 IE_DESC = 'YouTube.com live streams'
2963 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2964 IE_NAME = 'youtube:live'
2967 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2969 'id': 'a48o2S1cPoo',
2971 'title': 'The Young Turks - Live Main Show',
2972 'uploader': 'The Young Turks',
2973 'uploader_id': 'TheYoungTurks',
2974 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2975 'upload_date': '20150715',
2976 'license': 'Standard YouTube License',
2977 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2978 'categories': ['News & Politics'],
2979 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2981 'dislike_count': int,
2984 'skip_download': True,
2987 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2988 'only_matching': True,
2990 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2991 'only_matching': True,
2993 'url': 'https://www.youtube.com/TheYoungTurks/live',
2994 'only_matching': True,
2997 def _real_extract(self, url):
2998 mobj = re.match(self._VALID_URL, url)
2999 channel_id = mobj.group('id')
3000 base_url = mobj.group('base_url')
3001 webpage = self._download_webpage(url, channel_id, fatal=False)
3003 page_type = self._og_search_property(
3004 'type', webpage, 'page type', default='')
3005 video_id = self._html_search_meta(
3006 'videoId', webpage, 'video id', default=None)
3007 if page_type.startswith('video') and video_id and re.match(
3008 r'^[0-9A-Za-z_-]{11}$', video_id):
3009 return self.url_result(video_id, YoutubeIE.ie_key())
3010 return self.url_result(base_url)
3013 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3014 IE_DESC = 'YouTube.com user/channel playlists'
3015 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3016 IE_NAME = 'youtube:playlists'
3019 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3020 'playlist_mincount': 4,
3022 'id': 'ThirstForScience',
3023 'title': 'ThirstForScience',
3026 # with "Load more" button
3027 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3028 'playlist_mincount': 70,
3031 'title': 'Игорь Клейнер',
3034 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3035 'playlist_mincount': 17,
3037 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3038 'title': 'Chem Player',
3044 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3045 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3048 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3049 IE_DESC = 'YouTube.com searches'
3050 # there doesn't appear to be a real limit, for example if you search for
3051 # 'python' you get more than 8.000.000 results
3052 _MAX_RESULTS = float('inf')
3053 IE_NAME = 'youtube:search'
3054 _SEARCH_KEY = 'ytsearch'
3055 _EXTRA_QUERY_ARGS = {}
3058 def _get_n_results(self, query, n):
3059 """Get a specified number of results for a query"""
3065 'search_query': query.encode('utf-8'),
3067 url_query.update(self._EXTRA_QUERY_ARGS)
3068 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3070 for pagenum in itertools.count(1):
3071 data = self._download_json(
3072 result_url, video_id='query "%s"' % query,
3073 note='Downloading page %s' % pagenum,
3074 errnote='Unable to download API page',
3075 query={'spf': 'navigate'})
3076 html_content = data[1]['body']['content']
3078 if 'class="search-message' in html_content:
3079 raise ExtractorError(
3080 '[youtube] No video results', expected=True)
3082 new_videos = list(self._process_page(html_content))
3083 videos += new_videos
3084 if not new_videos or len(videos) > limit:
3086 next_link = self._html_search_regex(
3087 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3088 html_content, 'next link', default=None)
3089 if next_link is None:
3091 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3095 return self.playlist_result(videos, query)
3098 class YoutubeSearchDateIE(YoutubeSearchIE):
3099 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3100 _SEARCH_KEY = 'ytsearchdate'
3101 IE_DESC = 'YouTube.com searches, newest videos first'
3102 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3105 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3106 IE_DESC = 'YouTube.com search URLs'
3107 IE_NAME = 'youtube:search_url'
3108 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3110 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3111 'playlist_mincount': 5,
3113 'title': 'youtube-dl test video',
3116 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3117 'only_matching': True,
3120 def _real_extract(self, url):
3121 mobj = re.match(self._VALID_URL, url)
3122 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3123 webpage = self._download_webpage(url, query)
3124 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3127 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3128 IE_DESC = 'YouTube.com (multi-season) shows'
3129 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3130 IE_NAME = 'youtube:show'
3132 'url': 'https://www.youtube.com/show/airdisasters',
3133 'playlist_mincount': 5,
3135 'id': 'airdisasters',
3136 'title': 'Air Disasters',
3140 def _real_extract(self, url):
3141 playlist_id = self._match_id(url)
3142 return super(YoutubeShowIE, self)._real_extract(
3143 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3146 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3148 Base class for feed extractors
3149 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3151 _LOGIN_REQUIRED = True
3155 return 'youtube:%s' % self._FEED_NAME
3157 def _real_initialize(self):
3160 def _entries(self, page):
3161 # The extraction process is the same as for playlists, but the regex
3162 # for the video ids doesn't contain an index
3164 more_widget_html = content_html = page
3165 for page_num in itertools.count(1):
3166 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3168 # 'recommended' feed has infinite 'load more' and each new portion spins
3169 # the same videos in (sometimes) slightly different order, so we'll check
3170 # for unicity and break when portion has no new videos
3171 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3177 for entry in self._ids_to_results(new_ids):
3180 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3184 more = self._download_json(
3185 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3186 'Downloading page #%s' % page_num,
3187 transform_source=uppercase_escape)
3188 content_html = more['content_html']
3189 more_widget_html = more['load_more_widget_html']
3191 def _real_extract(self, url):
3192 page = self._download_webpage(
3193 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3194 self._PLAYLIST_TITLE)
3195 return self.playlist_result(
3196 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3199 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3200 IE_NAME = 'youtube:watchlater'
3201 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3202 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3205 'url': 'https://www.youtube.com/playlist?list=WL',
3206 'only_matching': True,
3208 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3209 'only_matching': True,
3212 def _real_extract(self, url):
3213 _, video = self._check_download_just_video(url, 'WL')
3216 _, playlist = self._extract_playlist('WL')
3220 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3221 IE_NAME = 'youtube:favorites'
3222 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3223 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3224 _LOGIN_REQUIRED = True
3226 def _real_extract(self, url):
3227 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3228 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3229 return self.url_result(playlist_id, 'YoutubePlaylist')
3232 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3233 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3234 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3235 _FEED_NAME = 'recommended'
3236 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3239 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3240 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3241 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3242 _FEED_NAME = 'subscriptions'
3243 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3246 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3247 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3248 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3249 _FEED_NAME = 'history'
3250 _PLAYLIST_TITLE = 'Youtube History'
3253 class YoutubeTruncatedURLIE(InfoExtractor):
3254 IE_NAME = 'youtube:truncated_url'
3255 IE_DESC = False # Do not list
3256 _VALID_URL = r'''(?x)
3258 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3261 annotation_id=annotation_[^&]+|
3267 attribution_link\?a=[^&]+
3273 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3274 'only_matching': True,
3276 'url': 'https://www.youtube.com/watch?',
3277 'only_matching': True,
3279 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3280 'only_matching': True,
3282 'url': 'https://www.youtube.com/watch?feature=foo',
3283 'only_matching': True,
3285 'url': 'https://www.youtube.com/watch?hl=en-GB',
3286 'only_matching': True,
3288 'url': 'https://www.youtube.com/watch?t=2372',
3289 'only_matching': True,
3292 def _real_extract(self, url):
3293 raise ExtractorError(
3294 'Did you forget to quote the URL? Remember that & is a meta '
3295 'character in most shells, so you want to put the URL in quotes, '
3297 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3298 ' or simply youtube-dl BaW_jenozKc .',
3302 class YoutubeTruncatedIDIE(InfoExtractor):
3303 IE_NAME = 'youtube:truncated_id'
3304 IE_DESC = False # Do not list
3305 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3308 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3309 'only_matching': True,
3312 def _real_extract(self, url):
3313 video_id = self._match_id(url)
3314 raise ExtractorError(
3315 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),