3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
36 get_element_by_attribute,
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
73 def _set_language(self):
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76 # YouTube sets the expire time to about two months
77 expire_time=time.time() + 2 * 30 * 24 * 3600)
79 def _ids_to_results(self, ids):
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 username, password = self._get_login_info()
93 # No authentication to be performed
95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
103 if login_page is False:
106 login_form = self._hidden_inputs(login_page)
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116 'f.req': json.dumps(f_req),
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
132 self._downloader.report_warning(message)
136 None, [], None, 'US', None, None, 2, False, True,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 1, [None, None, []], None, None, None, True
147 lookup_results = req(
148 self._LOOKUP_URL, lookup_req,
149 'Looking up account info', 'Unable to look up account info')
151 if lookup_results is False:
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 warn('Unable to extract user hash')
161 None, 1, None, [1, None, None, None, [password, None, True]],
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
171 if challenge_results is False:
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 warn('Unable to extract result entry')
187 login_challenge = try_get(res, lambda x: x[0][0], list)
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
193 status = try_get(login_challenge, lambda x: x[5], compat_str)
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 warn('Unable to extract TL')
203 tfa_code = self._get_tfa_info('2-step verification code')
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 tfa_code = remove_start(tfa_code, 'G-')
214 user_hash, None, 2, None,
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
224 if tfa_results is False:
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 challenge = CHALLENGES.get(
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
255 check_cookie_results = self._download_webpage(
256 check_cookie_url, None, 'Checking cookie', fatal=False)
258 if check_cookie_results is False:
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
267 def _download_webpage_handle(self, *args, **kwargs):
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272 *args, **compat_kwargs(kwargs))
274 def _real_initialize(self):
275 if self._downloader is None:
278 if not self._login():
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283 # Extract entries from page with "Load more" button
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
287 for entry in self._process_page(content_html):
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
296 while count <= retries:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
318 more_widget_html = more['load_more_widget_html']
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 video_id = mobj.group('id')
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
335 video_title = video_title.strip()
336 if video_title == '► Play all':
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
346 def extract_videos_from_page(self, page):
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
351 return zip(ids_in_page, titles_in_page)
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
365 title = self._og_search_title(webpage, fatal=False)
366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370 IE_DESC = 'YouTube.com'
371 _VALID_URL = r"""(?x)^
373 (?:https?://|//) # http(s):// or protocol-independent URL
374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375 (?:www\.)?deturl\.com/www\.youtube\.com/|
376 (?:www\.)?pwnyoutube\.com/|
377 (?:www\.)?hooktube\.com/|
378 (?:www\.)?yourepeat\.com/|
379 tube\.majestyc\.net/|
380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381 (?:(?:www|dev)\.)?invidio\.us/|
382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384 (?:www\.)?invidious\.kabi\.tk/|
385 (?:www\.)?invidious\.13ad\.de/|
386 (?:www\.)?invidious\.mastodon\.host/|
387 (?:www\.)?invidious\.nixnet\.xyz/|
388 (?:www\.)?invidious\.drycat\.fr/|
389 (?:www\.)?tube\.poal\.co/|
390 (?:www\.)?vid\.wxzm\.sx/|
391 (?:www\.)?yewtu\.be/|
392 (?:www\.)?yt\.elukerio\.org/|
393 (?:www\.)?yt\.lelux\.fi/|
394 (?:www\.)?invidious\.ggc-project\.de/|
395 (?:www\.)?yt\.maisputain\.ovh/|
396 (?:www\.)?invidious\.13ad\.de/|
397 (?:www\.)?invidious\.toot\.koeln/|
398 (?:www\.)?invidious\.fdn\.fr/|
399 (?:www\.)?watch\.nettohikari\.com/|
400 (?:www\.)?kgg2m7yk5aybusll\.onion/|
401 (?:www\.)?qklhadlycap4cnod\.onion/|
402 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
403 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
404 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
405 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
406 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
407 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
408 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
409 (?:.*?\#/)? # handle anchor (#/) redirect urls
410 (?: # the various things that can precede the ID:
411 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
412 |(?: # or the v= param in all its forms
413 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
414 (?:\?|\#!?) # the params delimiter ? or # or #!
415 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
420 youtu\.be| # just youtu.be/xxxx
421 vid\.plus| # or vid.plus/xxxx
422 zwearz\.com/watch| # or zwearz.com/watch/xxxx
424 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
426 )? # all until now is optional -> you can pass the naked ID
427 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
430 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
431 WL # WL are handled by the watch later IE
434 (?(1).+)? # if we found the ID, everything can follow
435 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
436 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
438 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
439 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
442 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
443 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
444 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
445 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
446 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
447 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
448 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
450 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
451 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
452 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
453 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
454 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
455 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
456 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
457 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
458 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
459 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
463 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
464 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
465 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
466 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
467 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
468 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
469 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
471 # Apple HTTP Live Streaming
472 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
473 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
474 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
475 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
476 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
477 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
478 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
479 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
482 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
483 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
485 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
486 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
487 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
488 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
489 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
490 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
491 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
492 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
493 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
496 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
497 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
498 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
499 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
500 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
501 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
502 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
505 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
506 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
507 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
508 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
509 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
510 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
511 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
512 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
517 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
520 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
521 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
522 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
523 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
524 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
525 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
526 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
529 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
530 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
532 # Dash webm audio with opus inside
533 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
534 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
535 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
538 '_rtmp': {'protocol': 'rtmp'},
540 # av01 video only formats sometimes served with "unknown" codecs
541 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
542 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
543 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
544 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
546 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
553 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
557 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
558 'uploader': 'Philipp Hagemeister',
559 'uploader_id': 'phihag',
560 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
561 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
562 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
563 'upload_date': '20121002',
564 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
565 'categories': ['Science & Technology'],
566 'tags': ['youtube-dl'],
570 'dislike_count': int,
576 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
577 'note': 'Test generic use_cipher_signature video (#897)',
581 'upload_date': '20120506',
582 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
583 'alt_title': 'I Love It (feat. Charli XCX)',
584 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
585 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
586 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
587 'iconic ep', 'iconic', 'love', 'it'],
589 'uploader': 'Icona Pop',
590 'uploader_id': 'IconaPop',
591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
592 'creator': 'Icona Pop',
593 'track': 'I Love It (feat. Charli XCX)',
594 'artist': 'Icona Pop',
598 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
599 'note': 'Test VEVO video with age protection (#956)',
603 'upload_date': '20130703',
604 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
605 'alt_title': 'Tunnel Vision',
606 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
608 'uploader': 'justintimberlakeVEVO',
609 'uploader_id': 'justintimberlakeVEVO',
610 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
611 'creator': 'Justin Timberlake',
612 'track': 'Tunnel Vision',
613 'artist': 'Justin Timberlake',
618 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
619 'note': 'Embed-only video (#1746)',
623 'upload_date': '20120608',
624 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
625 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
626 'uploader': 'SET India',
627 'uploader_id': 'setindia',
628 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
633 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
634 'note': 'Use the first video ID in the URL',
638 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
639 'uploader': 'Philipp Hagemeister',
640 'uploader_id': 'phihag',
641 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
642 'upload_date': '20121002',
643 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
644 'categories': ['Science & Technology'],
645 'tags': ['youtube-dl'],
649 'dislike_count': int,
652 'skip_download': True,
656 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
657 'note': '256k DASH audio (format 141) via DASH manifest',
661 'upload_date': '20121002',
662 'uploader_id': '8KVIDEO',
663 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
665 'uploader': '8KVIDEO',
666 'title': 'UHDTV TEST 8K VIDEO.mp4'
669 'youtube_include_dash_manifest': True,
672 'skip': 'format 141 not served anymore',
674 # DASH manifest with encrypted signature
676 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
680 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
681 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
683 'uploader': 'AfrojackVEVO',
684 'uploader_id': 'AfrojackVEVO',
685 'upload_date': '20131011',
688 'youtube_include_dash_manifest': True,
689 'format': '141/bestaudio[ext=m4a]',
692 # JS player signature function name containing $
694 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
698 'title': 'Taylor Swift - Shake It Off',
699 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
701 'uploader': 'TaylorSwiftVEVO',
702 'uploader_id': 'TaylorSwiftVEVO',
703 'upload_date': '20140818',
706 'youtube_include_dash_manifest': True,
707 'format': '141/bestaudio[ext=m4a]',
712 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
717 'upload_date': '20100909',
718 'uploader': 'Amazing Atheist',
719 'uploader_id': 'TheAmazingAtheist',
720 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
721 'title': 'Burning Everyone\'s Koran',
722 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
725 # Normal age-gate video (No vevo, embed allowed)
727 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
731 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
732 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
734 'uploader': 'The Witcher',
735 'uploader_id': 'WitcherGame',
736 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
737 'upload_date': '20140605',
741 # Age-gate video with encrypted signature
743 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
747 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
748 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
750 'uploader': 'LloydVEVO',
751 'uploader_id': 'LloydVEVO',
752 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
753 'upload_date': '20110629',
757 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
758 # YouTube Red ad is not captured for creator
760 'url': '__2ABJjxzNo',
765 'upload_date': '20100430',
766 'uploader_id': 'deadmau5',
767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
768 'creator': 'Dada Life, deadmau5',
769 'description': 'md5:12c56784b8032162bb936a5f76d55360',
770 'uploader': 'deadmau5',
771 'title': 'Deadmau5 - Some Chords (HD)',
772 'alt_title': 'This Machine Kills Some Chords',
774 'expected_warnings': [
775 'DASH manifest missing',
778 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
780 'url': 'lqQg6PlCWgI',
785 'upload_date': '20150827',
786 'uploader_id': 'olympic',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
788 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
789 'uploader': 'Olympic',
790 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
793 'skip_download': 'requires avconv',
798 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
802 'stretched_ratio': 16 / 9.,
804 'upload_date': '20110310',
805 'uploader_id': 'AllenMeow',
806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
807 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
809 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
812 # url_encoded_fmt_stream_map is empty string
814 'url': 'qEJwOuvDf7I',
818 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
820 'upload_date': '20150404',
821 'uploader_id': 'spbelect',
822 'uploader': 'Наблюдатели Петербурга',
825 'skip_download': 'requires avconv',
827 'skip': 'This live event has ended.',
829 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
831 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
835 'title': 'md5:7b81415841e02ecd4313668cde88737a',
836 'description': 'md5:116377fd2963b81ec4ce64b542173306',
838 'upload_date': '20150625',
839 'uploader_id': 'dorappi2000',
840 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
841 'uploader': 'dorappi2000',
842 'formats': 'mincount:31',
844 'skip': 'not actual anymore',
846 # DASH manifest with segment_list
848 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
849 'md5': '8ce563a1d667b599d21064e982ab9e31',
853 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
854 'uploader': 'Airtek',
855 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
856 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
857 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
860 'youtube_include_dash_manifest': True,
861 'format': '135', # bestvideo
863 'skip': 'This live event has ended.',
866 # Multifeed videos (multiple cameras), URL is for Main Camera
867 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
870 'title': 'teamPGP: Rocket League Noob Stream',
871 'description': 'md5:dc7872fb300e143831327f1bae3af010',
877 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
878 'description': 'md5:dc7872fb300e143831327f1bae3af010',
880 'upload_date': '20150721',
881 'uploader': 'Beer Games Beer',
882 'uploader_id': 'beergamesbeer',
883 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
884 'license': 'Standard YouTube License',
890 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
891 'description': 'md5:dc7872fb300e143831327f1bae3af010',
893 'upload_date': '20150721',
894 'uploader': 'Beer Games Beer',
895 'uploader_id': 'beergamesbeer',
896 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
897 'license': 'Standard YouTube License',
903 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
904 'description': 'md5:dc7872fb300e143831327f1bae3af010',
906 'upload_date': '20150721',
907 'uploader': 'Beer Games Beer',
908 'uploader_id': 'beergamesbeer',
909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
910 'license': 'Standard YouTube License',
916 'title': 'teamPGP: Rocket League Noob Stream (zim)',
917 'description': 'md5:dc7872fb300e143831327f1bae3af010',
919 'upload_date': '20150721',
920 'uploader': 'Beer Games Beer',
921 'uploader_id': 'beergamesbeer',
922 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
923 'license': 'Standard YouTube License',
927 'skip_download': True,
929 'skip': 'This video is not available.',
932 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
933 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
936 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
939 'skip': 'Not multifeed anymore',
942 'url': 'https://vid.plus/FlRa-iH7PGw',
943 'only_matching': True,
946 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
947 'only_matching': True,
950 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
951 # Also tests cut-off URL expansion in video description (see
952 # https://github.com/ytdl-org/youtube-dl/issues/1892,
953 # https://github.com/ytdl-org/youtube-dl/issues/8164)
954 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
958 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
959 'alt_title': 'Dark Walk - Position Music',
960 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
962 'upload_date': '20151119',
963 'uploader_id': 'IronSoulElf',
964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
965 'uploader': 'IronSoulElf',
966 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
967 'track': 'Dark Walk - Position Music',
968 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
969 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
972 'skip_download': True,
976 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
977 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
978 'only_matching': True,
981 # Video with yt:stretch=17:0
982 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
986 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
987 'description': 'md5:ee18a25c350637c8faff806845bddee9',
988 'upload_date': '20151107',
989 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
990 'uploader': 'CH GAMER DROID',
993 'skip_download': True,
995 'skip': 'This video does not exist.',
998 # Video licensed under Creative Commons
999 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1001 'id': 'M4gD1WSo5mA',
1003 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1004 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1006 'upload_date': '20150127',
1007 'uploader_id': 'BerkmanCenter',
1008 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1009 'uploader': 'The Berkman Klein Center for Internet & Society',
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1013 'skip_download': True,
1017 # Channel-like uploader_url
1018 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1020 'id': 'eQcmzGIKrzg',
1022 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1023 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1025 'upload_date': '20151119',
1026 'uploader': 'Bernie Sanders',
1027 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1028 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1029 'license': 'Creative Commons Attribution license (reuse allowed)',
1032 'skip_download': True,
1036 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1037 'only_matching': True,
1040 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1041 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1042 'only_matching': True,
1045 # Rental video preview
1046 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1048 'id': 'uGpuVWrhIzE',
1050 'title': 'Piku - Trailer',
1051 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1052 'upload_date': '20150811',
1053 'uploader': 'FlixMatrix',
1054 'uploader_id': 'FlixMatrixKaravan',
1055 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1056 'license': 'Standard YouTube License',
1059 'skip_download': True,
1061 'skip': 'This video is not available.',
1064 # YouTube Red video with episode data
1065 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1067 'id': 'iqKdEhx-dD4',
1069 'title': 'Isolation - Mind Field (Ep 1)',
1070 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1072 'upload_date': '20170118',
1073 'uploader': 'Vsauce',
1074 'uploader_id': 'Vsauce',
1075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1076 'series': 'Mind Field',
1078 'episode_number': 1,
1081 'skip_download': True,
1083 'expected_warnings': [
1084 'Skipping DASH manifest',
1088 # The following content has been identified by the YouTube community
1089 # as inappropriate or offensive to some audiences.
1090 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1092 'id': '6SJNVb0GnPI',
1094 'title': 'Race Differences in Intelligence',
1095 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1097 'upload_date': '20140124',
1098 'uploader': 'New Century Foundation',
1099 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1103 'skip_download': True,
1108 'url': '1t24XAntNCY',
1109 'only_matching': True,
1112 # geo restricted to JP
1113 'url': 'sJL6WA-aGkQ',
1114 'only_matching': True,
1117 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1118 'only_matching': True,
1121 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1122 'only_matching': True,
1126 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1127 'only_matching': True,
1130 # Video with unsupported adaptive stream type formats
1131 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1133 'id': 'Z4Vy8R84T1U',
1135 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1136 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1138 'upload_date': '20130923',
1139 'uploader': 'Amelia Putri Harwita',
1140 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1142 'formats': 'maxcount:10',
1145 'skip_download': True,
1146 'youtube_include_dash_manifest': False,
1148 'skip': 'not actual anymore',
1151 # Youtube Music Auto-generated description
1152 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1154 'id': 'MgNrAu2pzNs',
1156 'title': 'Voyeur Girl',
1157 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1158 'upload_date': '20190312',
1159 'uploader': 'Stephen - Topic',
1160 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1161 'artist': 'Stephen',
1162 'track': 'Voyeur Girl',
1163 'album': 'it\'s too much love to know my dear',
1164 'release_date': '20190313',
1165 'release_year': 2019,
1168 'skip_download': True,
1172 # Youtube Music Auto-generated description
1173 # Retrieve 'artist' field from 'Artist:' in video description
1174 # when it is present on youtube music video
1175 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1177 'id': 'k0jLE7tTwjY',
1179 'title': 'Latch Feat. Sam Smith',
1180 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1181 'upload_date': '20150110',
1182 'uploader': 'Various Artists - Topic',
1183 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1184 'artist': 'Disclosure',
1185 'track': 'Latch Feat. Sam Smith',
1186 'album': 'Latch Featuring Sam Smith',
1187 'release_date': '20121008',
1188 'release_year': 2012,
1191 'skip_download': True,
1195 # Youtube Music Auto-generated description
1196 # handle multiple artists on youtube music video
1197 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1199 'id': '74qn0eJSjpA',
1201 'title': 'Eastside',
1202 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1203 'upload_date': '20180710',
1204 'uploader': 'Benny Blanco - Topic',
1205 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1206 'artist': 'benny blanco, Halsey, Khalid',
1207 'track': 'Eastside',
1208 'album': 'Eastside',
1209 'release_date': '20180713',
1210 'release_year': 2018,
1213 'skip_download': True,
1217 # Youtube Music Auto-generated description
1218 # handle youtube music video with release_year and no release_date
1219 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1221 'id': '-hcAI0g-f5M',
1223 'title': 'Put It On Me',
1224 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1225 'upload_date': '20180426',
1226 'uploader': 'Matt Maeson - Topic',
1227 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1228 'artist': 'Matt Maeson',
1229 'track': 'Put It On Me',
1230 'album': 'The Hearse',
1231 'release_date': None,
1232 'release_year': 2018,
1235 'skip_download': True,
1239 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1240 'only_matching': True,
1243 # invalid -> valid video id redirection
1244 'url': 'DJztXj2GPfl',
1246 'id': 'DJztXj2GPfk',
1248 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1249 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1250 'upload_date': '20090125',
1251 'uploader': 'Prochorowka',
1252 'uploader_id': 'Prochorowka',
1253 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1254 'artist': 'Panjabi MC',
1255 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1256 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1259 'skip_download': True,
1264 def __init__(self, *args, **kwargs):
1265 super(YoutubeIE, self).__init__(*args, **kwargs)
1266 self._player_cache = {}
1268 def report_video_info_webpage_download(self, video_id):
1269 """Report attempt to download video info webpage."""
1270 self.to_screen('%s: Downloading video info webpage' % video_id)
1272 def report_information_extraction(self, video_id):
1273 """Report attempt to extract video information."""
1274 self.to_screen('%s: Extracting video information' % video_id)
1276 def report_unavailable_format(self, video_id, format):
1277 """Report extracted video URL."""
1278 self.to_screen('%s: Format %s not available' % (video_id, format))
1280 def report_rtmp_download(self):
1281 """Indicate the download will use the RTMP protocol."""
1282 self.to_screen('RTMP download detected')
1284 def _signature_cache_id(self, example_sig):
1285 """ Return a string representation of a signature """
1286 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1289 def _extract_player_info(cls, player_url):
1290 for player_re in cls._PLAYER_INFO_RE:
1291 id_m = re.search(player_re, player_url)
1295 raise ExtractorError('Cannot identify player %r' % player_url)
1296 return id_m.group('ext'), id_m.group('id')
1298 def _extract_signature_function(self, video_id, player_url, example_sig):
1299 player_type, player_id = self._extract_player_info(player_url)
1301 # Read from filesystem cache
1302 func_id = '%s_%s_%s' % (
1303 player_type, player_id, self._signature_cache_id(example_sig))
1304 assert os.path.basename(func_id) == func_id
1306 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1307 if cache_spec is not None:
1308 return lambda s: ''.join(s[i] for i in cache_spec)
1311 'Downloading player %s' % player_url
1312 if self._downloader.params.get('verbose') else
1313 'Downloading %s player %s' % (player_type, player_id)
1315 if player_type == 'js':
1316 code = self._download_webpage(
1317 player_url, video_id,
1319 errnote='Download of %s failed' % player_url)
1320 res = self._parse_sig_js(code)
1321 elif player_type == 'swf':
1322 urlh = self._request_webpage(
1323 player_url, video_id,
1325 errnote='Download of %s failed' % player_url)
1327 res = self._parse_sig_swf(code)
1329 assert False, 'Invalid player type %r' % player_type
1331 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1332 cache_res = res(test_string)
1333 cache_spec = [ord(c) for c in cache_res]
1335 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1338 def _print_sig_code(self, func, example_sig):
1339 def gen_sig_code(idxs):
1340 def _genslice(start, end, step):
1341 starts = '' if start == 0 else str(start)
1342 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1343 steps = '' if step == 1 else (':%d' % step)
1344 return 's[%s%s%s]' % (starts, ends, steps)
1347 # Quelch pyflakes warnings - start will be set when step is set
1348 start = '(Never used)'
1349 for i, prev in zip(idxs[1:], idxs[:-1]):
1350 if step is not None:
1351 if i - prev == step:
1353 yield _genslice(start, prev, step)
1356 if i - prev in [-1, 1]:
1361 yield 's[%d]' % prev
1365 yield _genslice(start, i, step)
1367 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1368 cache_res = func(test_string)
1369 cache_spec = [ord(c) for c in cache_res]
1370 expr_code = ' + '.join(gen_sig_code(cache_spec))
1371 signature_id_tuple = '(%s)' % (
1372 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1373 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1374 ' return %s\n') % (signature_id_tuple, expr_code)
1375 self.to_screen('Extracted signature function:\n' + code)
1377 def _parse_sig_js(self, jscode):
1378 funcname = self._search_regex(
1379 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1381 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1382 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1384 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1385 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1386 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1387 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1388 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1389 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1390 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1391 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1392 jscode, 'Initial JS player signature function name', group='sig')
1394 jsi = JSInterpreter(jscode)
1395 initial_function = jsi.extract_function(funcname)
1396 return lambda s: initial_function([s])
1398 def _parse_sig_swf(self, file_contents):
1399 swfi = SWFInterpreter(file_contents)
1400 TARGET_CLASSNAME = 'SignatureDecipher'
1401 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1402 initial_function = swfi.extract_function(searched_class, 'decipher')
1403 return lambda s: initial_function([s])
1405 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1406 """Turn the encrypted s field into a working signature"""
1408 if player_url is None:
1409 raise ExtractorError('Cannot decrypt signature without player_url')
1411 if player_url.startswith('//'):
1412 player_url = 'https:' + player_url
1413 elif not re.match(r'https?://', player_url):
1414 player_url = compat_urlparse.urljoin(
1415 'https://www.youtube.com', player_url)
1417 player_id = (player_url, self._signature_cache_id(s))
1418 if player_id not in self._player_cache:
1419 func = self._extract_signature_function(
1420 video_id, player_url, s
1422 self._player_cache[player_id] = func
1423 func = self._player_cache[player_id]
1424 if self._downloader.params.get('youtube_print_sig_code'):
1425 self._print_sig_code(func, s)
1427 except Exception as e:
1428 tb = traceback.format_exc()
1429 raise ExtractorError(
1430 'Signature extraction failed: ' + tb, cause=e)
1432 def _get_subtitles(self, video_id, webpage):
1434 subs_doc = self._download_xml(
1435 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1436 video_id, note=False)
1437 except ExtractorError as err:
1438 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1442 for track in subs_doc.findall('track'):
1443 lang = track.attrib['lang_code']
1444 if lang in sub_lang_list:
1447 for ext in self._SUBTITLE_FORMATS:
1448 params = compat_urllib_parse_urlencode({
1452 'name': track.attrib['name'].encode('utf-8'),
1454 sub_formats.append({
1455 'url': 'https://www.youtube.com/api/timedtext?' + params,
1458 sub_lang_list[lang] = sub_formats
1459 if not sub_lang_list:
1460 self._downloader.report_warning('video doesn\'t have subtitles')
1462 return sub_lang_list
1464 def _get_ytplayer_config(self, video_id, webpage):
1466 # User data may contain arbitrary character sequences that may affect
1467 # JSON extraction with regex, e.g. when '};' is contained the second
1468 # regex won't capture the whole JSON. Yet working around by trying more
1469 # concrete regex first keeping in mind proper quoted string handling
1470 # to be implemented in future that will replace this workaround (see
1471 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1472 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1473 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1474 r';ytplayer\.config\s*=\s*({.+?});',
1476 config = self._search_regex(
1477 patterns, webpage, 'ytplayer.config', default=None)
1479 return self._parse_json(
1480 uppercase_escape(config), video_id, fatal=False)
1482 def _get_automatic_captions(self, video_id, webpage):
1483 """We need the webpage for getting the captions url, pass it as an
1484 argument to speed up the process."""
1485 self.to_screen('%s: Looking for automatic captions' % video_id)
1486 player_config = self._get_ytplayer_config(video_id, webpage)
1487 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1488 if not player_config:
1489 self._downloader.report_warning(err_msg)
1492 args = player_config['args']
1493 caption_url = args.get('ttsurl')
1495 timestamp = args['timestamp']
1496 # We get the available subtitles
1497 list_params = compat_urllib_parse_urlencode({
1502 list_url = caption_url + '&' + list_params
1503 caption_list = self._download_xml(list_url, video_id)
1504 original_lang_node = caption_list.find('track')
1505 if original_lang_node is None:
1506 self._downloader.report_warning('Video doesn\'t have automatic captions')
1508 original_lang = original_lang_node.attrib['lang_code']
1509 caption_kind = original_lang_node.attrib.get('kind', '')
1512 for lang_node in caption_list.findall('target'):
1513 sub_lang = lang_node.attrib['lang_code']
1515 for ext in self._SUBTITLE_FORMATS:
1516 params = compat_urllib_parse_urlencode({
1517 'lang': original_lang,
1521 'kind': caption_kind,
1523 sub_formats.append({
1524 'url': caption_url + '&' + params,
1527 sub_lang_list[sub_lang] = sub_formats
1528 return sub_lang_list
1530 def make_captions(sub_url, sub_langs):
1531 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1532 caption_qs = compat_parse_qs(parsed_sub_url.query)
1534 for sub_lang in sub_langs:
1536 for ext in self._SUBTITLE_FORMATS:
1538 'tlang': [sub_lang],
1541 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1542 query=compat_urllib_parse_urlencode(caption_qs, True)))
1543 sub_formats.append({
1547 captions[sub_lang] = sub_formats
1550 # New captions format as of 22.06.2017
1551 player_response = args.get('player_response')
1552 if player_response and isinstance(player_response, compat_str):
1553 player_response = self._parse_json(
1554 player_response, video_id, fatal=False)
1556 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1557 base_url = renderer['captionTracks'][0]['baseUrl']
1559 for lang in renderer['translationLanguages']:
1560 lang_code = lang.get('languageCode')
1562 sub_lang_list.append(lang_code)
1563 return make_captions(base_url, sub_lang_list)
1565 # Some videos don't provide ttsurl but rather caption_tracks and
1566 # caption_translation_languages (e.g. 20LmZk1hakA)
1567 # Does not used anymore as of 22.06.2017
1568 caption_tracks = args['caption_tracks']
1569 caption_translation_languages = args['caption_translation_languages']
1570 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1572 for lang in caption_translation_languages.split(','):
1573 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1574 sub_lang = lang_qs.get('lc', [None])[0]
1576 sub_lang_list.append(sub_lang)
1577 return make_captions(caption_url, sub_lang_list)
1578 # An extractor error can be raise by the download process if there are
1579 # no automatic captions but there are subtitles
1580 except (KeyError, IndexError, ExtractorError):
1581 self._downloader.report_warning(err_msg)
1584 def _mark_watched(self, video_id, video_info, player_response):
1585 playback_url = url_or_none(try_get(
1587 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1588 video_info, lambda x: x['videostats_playback_base_url'][0]))
1589 if not playback_url:
1591 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1592 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1594 # cpn generation algorithm is reverse engineered from base.js.
1595 # In fact it works even with dummy cpn.
1596 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1597 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1603 playback_url = compat_urlparse.urlunparse(
1604 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1606 self._download_webpage(
1607 playback_url, video_id, 'Marking watched',
1608 'Unable to mark watched', fatal=False)
1611 def _extract_urls(webpage):
1612 # Embedded YouTube player
1614 unescapeHTML(mobj.group('url'))
1615 for mobj in re.finditer(r'''(?x)
1625 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1626 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1629 # lazyYT YouTube embed
1630 entries.extend(list(map(
1632 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1634 # Wordpress "YouTube Video Importer" plugin
1635 matches = re.findall(r'''(?x)<div[^>]+
1636 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1637 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1638 entries.extend(m[-1] for m in matches)
1643 def _extract_url(webpage):
1644 urls = YoutubeIE._extract_urls(webpage)
1645 return urls[0] if urls else None
1648 def extract_id(cls, url):
1649 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1651 raise ExtractorError('Invalid URL: %s' % url)
1652 video_id = mobj.group(2)
1656 def _extract_chapters(description, duration):
1659 chapter_lines = re.findall(
1660 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1662 if not chapter_lines:
1665 for next_num, (chapter_line, time_point) in enumerate(
1666 chapter_lines, start=1):
1667 start_time = parse_duration(time_point)
1668 if start_time is None:
1670 if start_time > duration:
1672 end_time = (duration if next_num == len(chapter_lines)
1673 else parse_duration(chapter_lines[next_num][1]))
1674 if end_time is None:
1676 if end_time > duration:
1678 if start_time > end_time:
1680 chapter_title = re.sub(
1681 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1682 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1684 'start_time': start_time,
1685 'end_time': end_time,
1686 'title': chapter_title,
1690 def _real_extract(self, url):
1691 url, smuggled_data = unsmuggle_url(url, {})
1694 'http' if self._downloader.params.get('prefer_insecure', False)
1699 parsed_url = compat_urllib_parse_urlparse(url)
1700 for component in [parsed_url.fragment, parsed_url.query]:
1701 query = compat_parse_qs(component)
1702 if start_time is None and 't' in query:
1703 start_time = parse_duration(query['t'][0])
1704 if start_time is None and 'start' in query:
1705 start_time = parse_duration(query['start'][0])
1706 if end_time is None and 'end' in query:
1707 end_time = parse_duration(query['end'][0])
1709 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1710 mobj = re.search(self._NEXT_URL_RE, url)
1712 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1713 video_id = self.extract_id(url)
1716 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1717 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1719 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1720 video_id = qs.get('v', [None])[0] or video_id
1722 # Attempt to extract SWF player URL
1723 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1724 if mobj is not None:
1725 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1731 def add_dash_mpd(video_info):
1732 dash_mpd = video_info.get('dashmpd')
1733 if dash_mpd and dash_mpd[0] not in dash_mpds:
1734 dash_mpds.append(dash_mpd[0])
1736 def add_dash_mpd_pr(pl_response):
1737 dash_mpd = url_or_none(try_get(
1738 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1740 if dash_mpd and dash_mpd not in dash_mpds:
1741 dash_mpds.append(dash_mpd)
1746 def extract_view_count(v_info):
1747 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1749 def extract_player_response(player_response, video_id):
1750 pl_response = str_or_none(player_response)
1753 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1754 if isinstance(pl_response, dict):
1755 add_dash_mpd_pr(pl_response)
1758 player_response = {}
1762 embed_webpage = None
1763 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1765 # We simulate the access to the video from www.youtube.com/v/{video_id}
1766 # this can be viewed without login into Youtube
1767 url = proto + '://www.youtube.com/embed/%s' % video_id
1768 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1769 data = compat_urllib_parse_urlencode({
1770 'video_id': video_id,
1771 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1772 'sts': self._search_regex(
1773 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1775 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1777 video_info_webpage = self._download_webpage(
1778 video_info_url, video_id,
1779 note='Refetching age-gated info webpage',
1780 errnote='unable to download video info webpage')
1781 except ExtractorError:
1782 video_info_webpage = None
1783 if video_info_webpage:
1784 video_info = compat_parse_qs(video_info_webpage)
1785 pl_response = video_info.get('player_response', [None])[0]
1786 player_response = extract_player_response(pl_response, video_id)
1787 add_dash_mpd(video_info)
1788 view_count = extract_view_count(video_info)
1791 # Try looking directly into the video webpage
1792 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1794 args = ytplayer_config['args']
1795 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1796 # Convert to the same format returned by compat_parse_qs
1797 video_info = dict((k, [v]) for k, v in args.items())
1798 add_dash_mpd(video_info)
1799 # Rental video is not rented but preview is available (e.g.
1800 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1801 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1802 if not video_info and args.get('ypc_vid'):
1803 return self.url_result(
1804 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1805 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1807 if not player_response:
1808 player_response = extract_player_response(args.get('player_response'), video_id)
1809 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1810 add_dash_mpd_pr(player_response)
1812 def extract_unavailable_message():
1814 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1815 msg = self._html_search_regex(
1816 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1817 video_webpage, 'unavailable %s' % kind, default=None)
1819 messages.append(msg)
1821 return '\n'.join(messages)
1823 if not video_info and not player_response:
1824 unavailable_message = extract_unavailable_message()
1825 if not unavailable_message:
1826 unavailable_message = 'Unable to extract video data'
1827 raise ExtractorError(
1828 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1830 if not isinstance(video_info, dict):
1833 video_details = try_get(
1834 player_response, lambda x: x['videoDetails'], dict) or {}
1836 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1838 self._downloader.report_warning('Unable to extract video title')
1841 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1842 if video_description:
1845 redir_url = compat_urlparse.urljoin(url, m.group(1))
1846 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1847 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1848 qs = compat_parse_qs(parsed_redir_url.query)
1854 description_original = video_description = re.sub(r'''(?x)
1856 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1857 (?:title|href)="([^"]+)"\s+
1858 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1862 ''', replace_url, video_description)
1863 video_description = clean_html(video_description)
1865 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1867 if not smuggled_data.get('force_singlefeed', False):
1868 if not self._downloader.params.get('noplaylist'):
1869 multifeed_metadata_list = try_get(
1871 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1872 compat_str) or try_get(
1873 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1874 if multifeed_metadata_list:
1877 for feed in multifeed_metadata_list.split(','):
1878 # Unquote should take place before split on comma (,) since textual
1879 # fields may contain comma as well (see
1880 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1881 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1883 def feed_entry(name):
1884 return try_get(feed_data, lambda x: x[name][0], compat_str)
1886 feed_id = feed_entry('id')
1889 feed_title = feed_entry('title')
1892 title += ' (%s)' % feed_title
1894 '_type': 'url_transparent',
1895 'ie_key': 'Youtube',
1897 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1898 {'force_singlefeed': True}),
1901 feed_ids.append(feed_id)
1903 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1904 % (', '.join(feed_ids), video_id))
1905 return self.playlist_result(entries, video_id, video_title, video_description)
1907 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1909 if view_count is None:
1910 view_count = extract_view_count(video_info)
1911 if view_count is None and video_details:
1912 view_count = int_or_none(video_details.get('viewCount'))
1915 is_live = bool_or_none(video_details.get('isLive'))
1917 # Check for "rental" videos
1918 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1919 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1921 def _extract_filesize(media_url):
1922 return int_or_none(self._search_regex(
1923 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1925 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1926 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1928 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1929 self.report_rtmp_download()
1931 'format_id': '_rtmp',
1933 'url': video_info['conn'][0],
1934 'player_url': player_url,
1936 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1937 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1938 if 'rtmpe%3Dyes' in encoded_url_map:
1939 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1942 fmt_list = video_info.get('fmt_list', [''])[0]
1944 for fmt in fmt_list.split(','):
1945 spec = fmt.split('/')
1947 width_height = spec[1].split('x')
1948 if len(width_height) == 2:
1949 formats_spec[spec[0]] = {
1950 'resolution': spec[1],
1951 'width': int_or_none(width_height[0]),
1952 'height': int_or_none(width_height[1]),
1954 for fmt in streaming_formats:
1955 itag = str_or_none(fmt.get('itag'))
1958 quality = fmt.get('quality')
1959 quality_label = fmt.get('qualityLabel') or quality
1960 formats_spec[itag] = {
1961 'asr': int_or_none(fmt.get('audioSampleRate')),
1962 'filesize': int_or_none(fmt.get('contentLength')),
1963 'format_note': quality_label,
1964 'fps': int_or_none(fmt.get('fps')),
1965 'height': int_or_none(fmt.get('height')),
1966 # bitrate for itag 43 is always 2147483647
1967 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1968 'width': int_or_none(fmt.get('width')),
1971 for fmt in streaming_formats:
1972 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1974 url = url_or_none(fmt.get('url'))
1977 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1980 url_data = compat_parse_qs(cipher)
1981 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1986 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1988 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1989 # Unsupported FORMAT_STREAM_TYPE_OTF
1990 if stream_type == 3:
1993 format_id = fmt.get('itag') or url_data['itag'][0]
1996 format_id = compat_str(format_id)
1999 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2000 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2001 jsplayer_url_json = self._search_regex(
2003 embed_webpage if age_gate else video_webpage,
2004 'JS player URL (1)', default=None)
2005 if not jsplayer_url_json and not age_gate:
2006 # We need the embed website after all
2007 if embed_webpage is None:
2008 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2009 embed_webpage = self._download_webpage(
2010 embed_url, video_id, 'Downloading embed webpage')
2011 jsplayer_url_json = self._search_regex(
2012 ASSETS_RE, embed_webpage, 'JS player URL')
2014 player_url = json.loads(jsplayer_url_json)
2015 if player_url is None:
2016 player_url_json = self._search_regex(
2017 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2018 video_webpage, 'age gate player URL')
2019 player_url = json.loads(player_url_json)
2021 if 'sig' in url_data:
2022 url += '&signature=' + url_data['sig'][0]
2023 elif 's' in url_data:
2024 encrypted_sig = url_data['s'][0]
2026 if self._downloader.params.get('verbose'):
2027 if player_url is None:
2028 player_desc = 'unknown'
2030 player_type, player_version = self._extract_player_info(player_url)
2031 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2032 parts_sizes = self._signature_cache_id(encrypted_sig)
2033 self.to_screen('{%s} signature length %s, %s' %
2034 (format_id, parts_sizes, player_desc))
2036 signature = self._decrypt_signature(
2037 encrypted_sig, video_id, player_url, age_gate)
2038 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2039 url += '&%s=%s' % (sp, signature)
2040 if 'ratebypass' not in url:
2041 url += '&ratebypass=yes'
2044 'format_id': format_id,
2046 'player_url': player_url,
2048 if format_id in self._formats:
2049 dct.update(self._formats[format_id])
2050 if format_id in formats_spec:
2051 dct.update(formats_spec[format_id])
2053 # Some itags are not included in DASH manifest thus corresponding formats will
2054 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2055 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2056 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2057 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2060 width = int_or_none(fmt.get('width'))
2062 height = int_or_none(fmt.get('height'))
2064 filesize = int_or_none(url_data.get(
2065 'clen', [None])[0]) or _extract_filesize(url)
2067 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2068 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2070 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2071 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2072 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2075 'filesize': filesize,
2080 'format_note': quality_label or quality,
2082 for key, value in more_fields.items():
2085 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2087 type_split = type_.split(';')
2088 kind_ext = type_split[0].split('/')
2089 if len(kind_ext) == 2:
2091 dct['ext'] = mimetype2ext(type_split[0])
2092 if kind in ('audio', 'video'):
2094 for mobj in re.finditer(
2095 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2096 if mobj.group('key') == 'codecs':
2097 codecs = mobj.group('val')
2100 dct.update(parse_codecs(codecs))
2101 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2102 dct['downloader_options'] = {
2103 # Youtube throttles chunks >~10M
2104 'http_chunk_size': 10485760,
2109 url_or_none(try_get(
2111 lambda x: x['streamingData']['hlsManifestUrl'],
2113 or url_or_none(try_get(
2114 video_info, lambda x: x['hlsvp'][0], compat_str)))
2117 m3u8_formats = self._extract_m3u8_formats(
2118 manifest_url, video_id, 'mp4', fatal=False)
2119 for a_format in m3u8_formats:
2120 itag = self._search_regex(
2121 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2123 a_format['format_id'] = itag
2124 if itag in self._formats:
2125 dct = self._formats[itag].copy()
2126 dct.update(a_format)
2128 a_format['player_url'] = player_url
2129 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2130 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2131 formats.append(a_format)
2133 error_message = extract_unavailable_message()
2134 if not error_message:
2135 error_message = clean_html(try_get(
2136 player_response, lambda x: x['playabilityStatus']['reason'],
2138 if not error_message:
2139 error_message = clean_html(
2140 try_get(video_info, lambda x: x['reason'][0], compat_str))
2142 raise ExtractorError(error_message, expected=True)
2143 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2146 video_uploader = try_get(
2147 video_info, lambda x: x['author'][0],
2148 compat_str) or str_or_none(video_details.get('author'))
2150 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2152 self._downloader.report_warning('unable to extract uploader name')
2155 video_uploader_id = None
2156 video_uploader_url = None
2158 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2160 if mobj is not None:
2161 video_uploader_id = mobj.group('uploader_id')
2162 video_uploader_url = mobj.group('uploader_url')
2164 self._downloader.report_warning('unable to extract uploader nickname')
2167 str_or_none(video_details.get('channelId'))
2168 or self._html_search_meta(
2169 'channelId', video_webpage, 'channel id', default=None)
2170 or self._search_regex(
2171 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2172 video_webpage, 'channel id', default=None, group='id'))
2173 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2176 # We try first to get a high quality image:
2177 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2178 video_webpage, re.DOTALL)
2179 if m_thumb is not None:
2180 video_thumbnail = m_thumb.group(1)
2181 elif 'thumbnail_url' not in video_info:
2182 self._downloader.report_warning('unable to extract video thumbnail')
2183 video_thumbnail = None
2184 else: # don't panic if we can't find it
2185 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2188 upload_date = self._html_search_meta(
2189 'datePublished', video_webpage, 'upload date', default=None)
2191 upload_date = self._search_regex(
2192 [r'(?s)id="eow-date.*?>(.*?)</span>',
2193 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2194 video_webpage, 'upload date', default=None)
2195 upload_date = unified_strdate(upload_date)
2197 video_license = self._html_search_regex(
2198 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2199 video_webpage, 'license', default=None)
2201 m_music = re.search(
2203 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2211 \bhref=["\']/red[^>]*>| # drop possible
2212 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2219 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2220 video_creator = clean_html(m_music.group('creator'))
2222 video_alt_title = video_creator = None
2224 def extract_meta(field):
2225 return self._html_search_regex(
2226 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2227 video_webpage, field, default=None)
2229 track = extract_meta('Song')
2230 artist = extract_meta('Artist')
2231 album = extract_meta('Album')
2233 # Youtube Music Auto-generated description
2234 release_date = release_year = None
2235 if video_description:
2236 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2239 track = mobj.group('track').strip()
2241 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2243 album = mobj.group('album'.strip())
2244 release_year = mobj.group('release_year')
2245 release_date = mobj.group('release_date')
2247 release_date = release_date.replace('-', '')
2248 if not release_year:
2249 release_year = int(release_date[:4])
2251 release_year = int(release_year)
2253 m_episode = re.search(
2254 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2257 series = unescapeHTML(m_episode.group('series'))
2258 season_number = int(m_episode.group('season'))
2259 episode_number = int(m_episode.group('episode'))
2261 series = season_number = episode_number = None
2263 m_cat_container = self._search_regex(
2264 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2265 video_webpage, 'categories', default=None)
2267 category = self._html_search_regex(
2268 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2270 video_categories = None if category is None else [category]
2272 video_categories = None
2275 unescapeHTML(m.group('content'))
2276 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2278 def _extract_count(count_name):
2279 return str_to_int(self._search_regex(
2280 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2281 % re.escape(count_name),
2282 video_webpage, count_name, default=None))
2284 like_count = _extract_count('like')
2285 dislike_count = _extract_count('dislike')
2287 if view_count is None:
2288 view_count = str_to_int(self._search_regex(
2289 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2290 'view count', default=None))
2293 float_or_none(video_details.get('averageRating'))
2294 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2297 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2298 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2300 video_duration = try_get(
2301 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2302 if not video_duration:
2303 video_duration = int_or_none(video_details.get('lengthSeconds'))
2304 if not video_duration:
2305 video_duration = parse_duration(self._html_search_meta(
2306 'duration', video_webpage, 'video duration'))
2309 video_annotations = None
2310 if self._downloader.params.get('writeannotations', False):
2311 xsrf_token = self._search_regex(
2312 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2313 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2314 invideo_url = try_get(
2315 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2316 if xsrf_token and invideo_url:
2317 xsrf_field_name = self._search_regex(
2318 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2319 video_webpage, 'xsrf field name',
2320 group='xsrf_field_name', default='session_token')
2321 video_annotations = self._download_webpage(
2322 self._proto_relative_url(invideo_url),
2323 video_id, note='Downloading annotations',
2324 errnote='Unable to download video annotations', fatal=False,
2325 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2327 chapters = self._extract_chapters(description_original, video_duration)
2329 # Look for the DASH manifest
2330 if self._downloader.params.get('youtube_include_dash_manifest', True):
2331 dash_mpd_fatal = True
2332 for mpd_url in dash_mpds:
2335 def decrypt_sig(mobj):
2337 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2338 return '/signature/%s' % dec_s
2340 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2342 for df in self._extract_mpd_formats(
2343 mpd_url, video_id, fatal=dash_mpd_fatal,
2344 formats_dict=self._formats):
2345 if not df.get('filesize'):
2346 df['filesize'] = _extract_filesize(df['url'])
2347 # Do not overwrite DASH format found in some previous DASH manifest
2348 if df['format_id'] not in dash_formats:
2349 dash_formats[df['format_id']] = df
2350 # Additional DASH manifests may end up in HTTP Error 403 therefore
2351 # allow them to fail without bug report message if we already have
2352 # some DASH manifest succeeded. This is temporary workaround to reduce
2353 # burst of bug reports until we figure out the reason and whether it
2354 # can be fixed at all.
2355 dash_mpd_fatal = False
2356 except (ExtractorError, KeyError) as e:
2357 self.report_warning(
2358 'Skipping DASH manifest: %r' % e, video_id)
2360 # Remove the formats we found through non-DASH, they
2361 # contain less info and it can be wrong, because we use
2362 # fixed values (for example the resolution). See
2363 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2365 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2366 formats.extend(dash_formats.values())
2368 # Check for malformed aspect ratio
2369 stretched_m = re.search(
2370 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2373 w = float(stretched_m.group('w'))
2374 h = float(stretched_m.group('h'))
2375 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2376 # We will only process correct ratios.
2380 if f.get('vcodec') != 'none':
2381 f['stretched_ratio'] = ratio
2384 if 'reason' in video_info:
2385 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2386 regions_allowed = self._html_search_meta(
2387 'regionsAllowed', video_webpage, default=None)
2388 countries = regions_allowed.split(',') if regions_allowed else None
2389 self.raise_geo_restricted(
2390 msg=video_info['reason'][0], countries=countries)
2391 reason = video_info['reason'][0]
2392 if 'Invalid parameters' in reason:
2393 unavailable_message = extract_unavailable_message()
2394 if unavailable_message:
2395 reason = unavailable_message
2396 raise ExtractorError(
2397 'YouTube said: %s' % reason,
2398 expected=True, video_id=video_id)
2399 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2400 raise ExtractorError('This video is DRM protected.', expected=True)
2402 self._sort_formats(formats)
2404 self.mark_watched(video_id, video_info, player_response)
2408 'uploader': video_uploader,
2409 'uploader_id': video_uploader_id,
2410 'uploader_url': video_uploader_url,
2411 'channel_id': channel_id,
2412 'channel_url': channel_url,
2413 'upload_date': upload_date,
2414 'license': video_license,
2415 'creator': video_creator or artist,
2416 'title': video_title,
2417 'alt_title': video_alt_title or track,
2418 'thumbnail': video_thumbnail,
2419 'description': video_description,
2420 'categories': video_categories,
2422 'subtitles': video_subtitles,
2423 'automatic_captions': automatic_captions,
2424 'duration': video_duration,
2425 'age_limit': 18 if age_gate else 0,
2426 'annotations': video_annotations,
2427 'chapters': chapters,
2428 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2429 'view_count': view_count,
2430 'like_count': like_count,
2431 'dislike_count': dislike_count,
2432 'average_rating': average_rating,
2435 'start_time': start_time,
2436 'end_time': end_time,
2438 'season_number': season_number,
2439 'episode_number': episode_number,
2443 'release_date': release_date,
2444 'release_year': release_year,
2448 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2449 IE_DESC = 'YouTube.com playlists'
2450 _VALID_URL = r"""(?x)(?:
2455 youtube(?:kids)?\.com|
2460 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2461 \? (?:.*?[&;])*? (?:p|a|list)=
2464 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2467 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2468 # Top tracks, they can also include dots
2474 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2475 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2476 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2477 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2478 IE_NAME = 'youtube:playlist'
2480 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2482 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2483 'uploader': 'Sergey M.',
2484 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2485 'title': 'youtube-dl public playlist',
2487 'playlist_count': 1,
2489 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2491 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2492 'uploader': 'Sergey M.',
2493 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2494 'title': 'youtube-dl empty playlist',
2496 'playlist_count': 0,
2498 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2499 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2501 'title': '29C3: Not my department',
2502 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2503 'uploader': 'Christiaan008',
2504 'uploader_id': 'ChRiStIaAn008',
2506 'playlist_count': 96,
2508 'note': 'issue #673',
2509 'url': 'PLBB231211A4F62143',
2511 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2512 'id': 'PLBB231211A4F62143',
2513 'uploader': 'Wickydoo',
2514 'uploader_id': 'Wickydoo',
2516 'playlist_mincount': 26,
2518 'note': 'Large playlist',
2519 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2521 'title': 'Uploads from Cauchemar',
2522 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2523 'uploader': 'Cauchemar',
2524 'uploader_id': 'Cauchemar89',
2526 'playlist_mincount': 799,
2528 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2530 'title': 'YDL_safe_search',
2531 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2533 'playlist_count': 2,
2534 'skip': 'This playlist is private',
2537 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2538 'playlist_count': 4,
2541 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2542 'uploader': 'milan',
2543 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2546 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2547 'playlist_mincount': 485,
2549 'title': '2018 Chinese New Singles (11/6 updated)',
2550 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2552 'uploader_id': 'sdragonfang',
2555 'note': 'Embedded SWF player',
2556 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2557 'playlist_count': 4,
2560 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2562 'skip': 'This playlist does not exist',
2564 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2565 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2567 'title': 'Uploads from Interstellar Movie',
2568 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2569 'uploader': 'Interstellar Movie',
2570 'uploader_id': 'InterstellarMovie1',
2572 'playlist_mincount': 21,
2574 # Playlist URL that does not actually serve a playlist
2575 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2577 'id': 'FqZTN594JQw',
2579 'title': "Smiley's People 01 detective, Adventure Series, Action",
2580 'uploader': 'STREEM',
2581 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2582 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2583 'upload_date': '20150526',
2584 'license': 'Standard YouTube License',
2585 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2586 'categories': ['People & Blogs'],
2590 'dislike_count': int,
2593 'skip_download': True,
2595 'skip': 'This video is not available.',
2596 'add_ie': [YoutubeIE.ie_key()],
2598 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2600 'id': 'yeWKywCrFtk',
2602 'title': 'Small Scale Baler and Braiding Rugs',
2603 'uploader': 'Backus-Page House Museum',
2604 'uploader_id': 'backuspagemuseum',
2605 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2606 'upload_date': '20161008',
2607 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2608 'categories': ['Nonprofits & Activism'],
2611 'dislike_count': int,
2615 'skip_download': True,
2618 # https://github.com/ytdl-org/youtube-dl/issues/21844
2619 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2621 'title': 'Data Analysis with Dr Mike Pound',
2622 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2623 'uploader_id': 'Computerphile',
2624 'uploader': 'Computerphile',
2626 'playlist_mincount': 11,
2628 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2629 'only_matching': True,
2631 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2632 'only_matching': True,
2634 # music album playlist
2635 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2636 'only_matching': True,
2638 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2639 'only_matching': True,
2641 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2642 'only_matching': True,
2645 def _real_initialize(self):
2648 def extract_videos_from_page(self, page):
2652 for item in re.findall(
2653 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2654 attrs = extract_attributes(item)
2655 video_id = attrs['data-video-id']
2656 video_title = unescapeHTML(attrs.get('data-title'))
2658 video_title = video_title.strip()
2659 ids_in_page.append(video_id)
2660 titles_in_page.append(video_title)
2662 # Fallback with old _VIDEO_RE
2663 self.extract_videos_from_page_impl(
2664 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2667 self.extract_videos_from_page_impl(
2668 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2669 ids_in_page, titles_in_page)
2670 self.extract_videos_from_page_impl(
2671 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2672 ids_in_page, titles_in_page)
2674 return zip(ids_in_page, titles_in_page)
2676 def _extract_mix(self, playlist_id):
2677 # The mixes are generated from a single video
2678 # the id of the playlist is just 'RD' + video_id
2680 last_id = playlist_id[-11:]
2681 for n in itertools.count(1):
2682 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2683 webpage = self._download_webpage(
2684 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2685 new_ids = orderedSet(re.findall(
2686 r'''(?xs)data-video-username=".*?".*?
2687 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2689 # Fetch new pages until all the videos are repeated, it seems that
2690 # there are always 51 unique videos.
2691 new_ids = [_id for _id in new_ids if _id not in ids]
2697 url_results = self._ids_to_results(ids)
2699 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2701 search_title('playlist-title')
2702 or search_title('title long-title')
2703 or search_title('title'))
2704 title = clean_html(title_span)
2706 return self.playlist_result(url_results, playlist_id, title)
2708 def _extract_playlist(self, playlist_id):
2709 url = self._TEMPLATE_URL % playlist_id
2710 page = self._download_webpage(url, playlist_id)
2712 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2713 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2714 match = match.strip()
2715 # Check if the playlist exists or is private
2716 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2718 reason = mobj.group('reason')
2719 message = 'This playlist %s' % reason
2720 if 'private' in reason:
2721 message += ', use --username or --netrc to access it'
2723 raise ExtractorError(message, expected=True)
2724 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2725 raise ExtractorError(
2726 'Invalid parameters. Maybe URL is incorrect.',
2728 elif re.match(r'[^<]*Choose your language[^<]*', match):
2731 self.report_warning('Youtube gives an alert message: ' + match)
2733 playlist_title = self._html_search_regex(
2734 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2735 page, 'title', default=None)
2737 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2738 uploader = self._html_search_regex(
2739 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2740 page, 'uploader', default=None)
2742 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2745 uploader_id = mobj.group('uploader_id')
2746 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2748 uploader_id = uploader_url = None
2752 if not playlist_title:
2754 # Some playlist URLs don't actually serve a playlist (e.g.
2755 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2756 next(self._entries(page, playlist_id))
2757 except StopIteration:
2760 playlist = self.playlist_result(
2761 self._entries(page, playlist_id), playlist_id, playlist_title)
2763 'uploader': uploader,
2764 'uploader_id': uploader_id,
2765 'uploader_url': uploader_url,
2768 return has_videos, playlist
2770 def _check_download_just_video(self, url, playlist_id):
2771 # Check if it's a video-specific URL
2772 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2773 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2774 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2775 'video id', default=None)
2777 if self._downloader.params.get('noplaylist'):
2778 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2779 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2781 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2782 return video_id, None
2785 def _real_extract(self, url):
2786 # Extract playlist id
2787 mobj = re.match(self._VALID_URL, url)
2789 raise ExtractorError('Invalid URL: %s' % url)
2790 playlist_id = mobj.group(1) or mobj.group(2)
2792 video_id, video = self._check_download_just_video(url, playlist_id)
2796 if playlist_id.startswith(('RD', 'UL', 'PU')):
2797 # Mixes require a custom extraction process
2798 return self._extract_mix(playlist_id)
2800 has_videos, playlist = self._extract_playlist(playlist_id)
2801 if has_videos or not video_id:
2804 # Some playlist URLs don't actually serve a playlist (see
2805 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2806 # Fallback to plain video extraction if there is a video id
2807 # along with playlist id.
2808 return self.url_result(video_id, 'Youtube', video_id=video_id)
2811 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2812 IE_DESC = 'YouTube.com channels'
2813 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2814 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2815 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2816 IE_NAME = 'youtube:channel'
2818 'note': 'paginated channel',
2819 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2820 'playlist_mincount': 91,
2822 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2823 'title': 'Uploads from lex will',
2824 'uploader': 'lex will',
2825 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2828 'note': 'Age restricted channel',
2829 # from https://www.youtube.com/user/DeusExOfficial
2830 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2831 'playlist_mincount': 64,
2833 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2834 'title': 'Uploads from Deus Ex',
2835 'uploader': 'Deus Ex',
2836 'uploader_id': 'DeusExOfficial',
2839 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2840 'only_matching': True,
2842 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2843 'only_matching': True,
2847 def suitable(cls, url):
2848 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2849 else super(YoutubeChannelIE, cls).suitable(url))
2851 def _build_template_url(self, url, channel_id):
2852 return self._TEMPLATE_URL % channel_id
2854 def _real_extract(self, url):
2855 channel_id = self._match_id(url)
2857 url = self._build_template_url(url, channel_id)
2859 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2860 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2861 # otherwise fallback on channel by page extraction
2862 channel_page = self._download_webpage(
2863 url + '?view=57', channel_id,
2864 'Downloading channel page', fatal=False)
2865 if channel_page is False:
2866 channel_playlist_id = False
2868 channel_playlist_id = self._html_search_meta(
2869 'channelId', channel_page, 'channel id', default=None)
2870 if not channel_playlist_id:
2871 channel_url = self._html_search_meta(
2872 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2873 channel_page, 'channel url', default=None)
2875 channel_playlist_id = self._search_regex(
2876 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2877 channel_url, 'channel id', default=None)
2878 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2879 playlist_id = 'UU' + channel_playlist_id[2:]
2880 return self.url_result(
2881 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2883 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2884 autogenerated = re.search(r'''(?x)
2886 channel-header-autogenerated-label|
2887 yt-channel-title-autogenerated
2888 )[^"]*"''', channel_page) is not None
2891 # The videos are contained in a single page
2892 # the ajax pages can't be used, they are empty
2895 video_id, 'Youtube', video_id=video_id,
2896 video_title=video_title)
2897 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2898 return self.playlist_result(entries, channel_id)
2901 next(self._entries(channel_page, channel_id))
2902 except StopIteration:
2903 alert_message = self._html_search_regex(
2904 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2905 channel_page, 'alert', default=None, group='alert')
2907 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2909 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2912 class YoutubeUserIE(YoutubeChannelIE):
2913 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2914 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2915 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2916 IE_NAME = 'youtube:user'
2919 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2920 'playlist_mincount': 320,
2922 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2923 'title': 'Uploads from The Linux Foundation',
2924 'uploader': 'The Linux Foundation',
2925 'uploader_id': 'TheLinuxFoundation',
2928 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2929 # but not https://www.youtube.com/user/12minuteathlete/videos
2930 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2931 'playlist_mincount': 249,
2933 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2934 'title': 'Uploads from 12 Minute Athlete',
2935 'uploader': '12 Minute Athlete',
2936 'uploader_id': 'the12minuteathlete',
2939 'url': 'ytuser:phihag',
2940 'only_matching': True,
2942 'url': 'https://www.youtube.com/c/gametrailers',
2943 'only_matching': True,
2945 'url': 'https://www.youtube.com/gametrailers',
2946 'only_matching': True,
2948 # This channel is not available, geo restricted to JP
2949 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2950 'only_matching': True,
2954 def suitable(cls, url):
2955 # Don't return True if the url can be extracted with other youtube
2956 # extractor, the regex would is too permissive and it would match.
2957 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2958 if any(ie.suitable(url) for ie in other_yt_ies):
2961 return super(YoutubeUserIE, cls).suitable(url)
2963 def _build_template_url(self, url, channel_id):
2964 mobj = re.match(self._VALID_URL, url)
2965 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2968 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2969 IE_DESC = 'YouTube.com live streams'
2970 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2971 IE_NAME = 'youtube:live'
2974 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2976 'id': 'a48o2S1cPoo',
2978 'title': 'The Young Turks - Live Main Show',
2979 'uploader': 'The Young Turks',
2980 'uploader_id': 'TheYoungTurks',
2981 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2982 'upload_date': '20150715',
2983 'license': 'Standard YouTube License',
2984 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2985 'categories': ['News & Politics'],
2986 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2988 'dislike_count': int,
2991 'skip_download': True,
2994 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2995 'only_matching': True,
2997 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2998 'only_matching': True,
3000 'url': 'https://www.youtube.com/TheYoungTurks/live',
3001 'only_matching': True,
3004 def _real_extract(self, url):
3005 mobj = re.match(self._VALID_URL, url)
3006 channel_id = mobj.group('id')
3007 base_url = mobj.group('base_url')
3008 webpage = self._download_webpage(url, channel_id, fatal=False)
3010 page_type = self._og_search_property(
3011 'type', webpage, 'page type', default='')
3012 video_id = self._html_search_meta(
3013 'videoId', webpage, 'video id', default=None)
3014 if page_type.startswith('video') and video_id and re.match(
3015 r'^[0-9A-Za-z_-]{11}$', video_id):
3016 return self.url_result(video_id, YoutubeIE.ie_key())
3017 return self.url_result(base_url)
3020 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3021 IE_DESC = 'YouTube.com user/channel playlists'
3022 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3023 IE_NAME = 'youtube:playlists'
3026 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3027 'playlist_mincount': 4,
3029 'id': 'ThirstForScience',
3030 'title': 'ThirstForScience',
3033 # with "Load more" button
3034 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3035 'playlist_mincount': 70,
3038 'title': 'Игорь Клейнер',
3041 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3042 'playlist_mincount': 17,
3044 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3045 'title': 'Chem Player',
3051 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3052 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3055 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3056 IE_DESC = 'YouTube.com searches'
3057 # there doesn't appear to be a real limit, for example if you search for
3058 # 'python' you get more than 8.000.000 results
3059 _MAX_RESULTS = float('inf')
3060 IE_NAME = 'youtube:search'
3061 _SEARCH_KEY = 'ytsearch'
3062 _EXTRA_QUERY_ARGS = {}
3065 def _get_n_results(self, query, n):
3066 """Get a specified number of results for a query"""
3072 'search_query': query.encode('utf-8'),
3074 url_query.update(self._EXTRA_QUERY_ARGS)
3075 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3077 for pagenum in itertools.count(1):
3078 data = self._download_json(
3079 result_url, video_id='query "%s"' % query,
3080 note='Downloading page %s' % pagenum,
3081 errnote='Unable to download API page',
3082 query={'spf': 'navigate'})
3083 html_content = data[1]['body']['content']
3085 if 'class="search-message' in html_content:
3086 raise ExtractorError(
3087 '[youtube] No video results', expected=True)
3089 new_videos = list(self._process_page(html_content))
3090 videos += new_videos
3091 if not new_videos or len(videos) > limit:
3093 next_link = self._html_search_regex(
3094 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3095 html_content, 'next link', default=None)
3096 if next_link is None:
3098 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3102 return self.playlist_result(videos, query)
3105 class YoutubeSearchDateIE(YoutubeSearchIE):
3106 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3107 _SEARCH_KEY = 'ytsearchdate'
3108 IE_DESC = 'YouTube.com searches, newest videos first'
3109 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3112 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3113 IE_DESC = 'YouTube.com search URLs'
3114 IE_NAME = 'youtube:search_url'
3115 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3117 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3118 'playlist_mincount': 5,
3120 'title': 'youtube-dl test video',
3123 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3124 'only_matching': True,
3127 def _real_extract(self, url):
3128 mobj = re.match(self._VALID_URL, url)
3129 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3130 webpage = self._download_webpage(url, query)
3131 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3134 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3135 IE_DESC = 'YouTube.com (multi-season) shows'
3136 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3137 IE_NAME = 'youtube:show'
3139 'url': 'https://www.youtube.com/show/airdisasters',
3140 'playlist_mincount': 5,
3142 'id': 'airdisasters',
3143 'title': 'Air Disasters',
3147 def _real_extract(self, url):
3148 playlist_id = self._match_id(url)
3149 return super(YoutubeShowIE, self)._real_extract(
3150 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3153 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3155 Base class for feed extractors
3156 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3158 _LOGIN_REQUIRED = True
3162 return 'youtube:%s' % self._FEED_NAME
3164 def _real_initialize(self):
3167 def _entries(self, page):
3168 # The extraction process is the same as for playlists, but the regex
3169 # for the video ids doesn't contain an index
3171 more_widget_html = content_html = page
3172 for page_num in itertools.count(1):
3173 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3175 # 'recommended' feed has infinite 'load more' and each new portion spins
3176 # the same videos in (sometimes) slightly different order, so we'll check
3177 # for unicity and break when portion has no new videos
3178 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3184 for entry in self._ids_to_results(new_ids):
3187 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3191 more = self._download_json(
3192 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3193 'Downloading page #%s' % page_num,
3194 transform_source=uppercase_escape)
3195 content_html = more['content_html']
3196 more_widget_html = more['load_more_widget_html']
3198 def _real_extract(self, url):
3199 page = self._download_webpage(
3200 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3201 self._PLAYLIST_TITLE)
3202 return self.playlist_result(
3203 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3206 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3207 IE_NAME = 'youtube:watchlater'
3208 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3209 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3212 'url': 'https://www.youtube.com/playlist?list=WL',
3213 'only_matching': True,
3215 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3216 'only_matching': True,
3219 def _real_extract(self, url):
3220 _, video = self._check_download_just_video(url, 'WL')
3223 _, playlist = self._extract_playlist('WL')
3227 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3228 IE_NAME = 'youtube:favorites'
3229 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3230 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3231 _LOGIN_REQUIRED = True
3233 def _real_extract(self, url):
3234 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3235 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3236 return self.url_result(playlist_id, 'YoutubePlaylist')
3239 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3240 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3241 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3242 _FEED_NAME = 'recommended'
3243 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3246 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3247 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3248 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3249 _FEED_NAME = 'subscriptions'
3250 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3253 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3254 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3255 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3256 _FEED_NAME = 'history'
3257 _PLAYLIST_TITLE = 'Youtube History'
3260 class YoutubeTruncatedURLIE(InfoExtractor):
3261 IE_NAME = 'youtube:truncated_url'
3262 IE_DESC = False # Do not list
3263 _VALID_URL = r'''(?x)
3265 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3268 annotation_id=annotation_[^&]+|
3274 attribution_link\?a=[^&]+
3280 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3281 'only_matching': True,
3283 'url': 'https://www.youtube.com/watch?',
3284 'only_matching': True,
3286 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3287 'only_matching': True,
3289 'url': 'https://www.youtube.com/watch?feature=foo',
3290 'only_matching': True,
3292 'url': 'https://www.youtube.com/watch?hl=en-GB',
3293 'only_matching': True,
3295 'url': 'https://www.youtube.com/watch?t=2372',
3296 'only_matching': True,
3299 def _real_extract(self, url):
3300 raise ExtractorError(
3301 'Did you forget to quote the URL? Remember that & is a meta '
3302 'character in most shells, so you want to put the URL in quotes, '
3304 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3305 ' or simply youtube-dl BaW_jenozKc .',
3309 class YoutubeTruncatedIDIE(InfoExtractor):
3310 IE_NAME = 'youtube:truncated_id'
3311 IE_DESC = False # Do not list
3312 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3315 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3316 'only_matching': True,
3319 def _real_extract(self, url):
3320 video_id = self._match_id(url)
3321 raise ExtractorError(
3322 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),