3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
60 class YoutubeBaseInfoExtractor(InfoExtractor):
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
73 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
75 def _set_language(self):
77 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
78 # YouTube sets the expire time to about two months
79 expire_time=time.time() + 2 * 30 * 24 * 3600)
81 def _ids_to_results(self, ids):
83 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 Attempt to log in to YouTube.
89 True is returned if successful or skipped.
90 False is returned if login failed.
92 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 username, password = self._get_login_info()
95 # No authentication to be performed
97 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
98 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
101 login_page = self._download_webpage(
102 self._LOGIN_URL, None,
103 note='Downloading login page',
104 errnote='unable to fetch login page', fatal=False)
105 if login_page is False:
108 login_form = self._hidden_inputs(login_page)
110 def req(url, f_req, note, errnote):
111 data = login_form.copy()
114 'checkConnection': 'youtube',
115 'checkedDomains': 'youtube',
117 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
118 'f.req': json.dumps(f_req),
119 'flowName': 'GlifWebSignIn',
120 'flowEntry': 'ServiceLogin',
121 # TODO: reverse actual botguard identifier generation algo
122 'bgRequest': '["identifier",""]',
124 return self._download_json(
125 url, None, note=note, errnote=errnote,
126 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
128 data=urlencode_postdata(data), headers={
129 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
130 'Google-Accounts-XSRF': 1,
134 self._downloader.report_warning(message)
138 None, [], None, 'US', None, None, 2, False, True,
142 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
144 1, [None, None, []], None, None, None, True
149 lookup_results = req(
150 self._LOOKUP_URL, lookup_req,
151 'Looking up account info', 'Unable to look up account info')
153 if lookup_results is False:
156 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
158 warn('Unable to extract user hash')
163 None, 1, None, [1, None, None, None, [password, None, True]],
165 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
166 1, [None, None, []], None, None, None, True
169 challenge_results = req(
170 self._CHALLENGE_URL, challenge_req,
171 'Logging in', 'Unable to log in')
173 if challenge_results is False:
176 login_res = try_get(challenge_results, lambda x: x[0][5], list)
178 login_msg = try_get(login_res, lambda x: x[5], compat_str)
180 'Unable to login: %s' % 'Invalid password'
181 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
184 res = try_get(challenge_results, lambda x: x[0][-1], list)
186 warn('Unable to extract result entry')
189 login_challenge = try_get(res, lambda x: x[0][0], list)
191 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
192 if challenge_str == 'TWO_STEP_VERIFICATION':
193 # SEND_SUCCESS - TFA code has been successfully sent to phone
194 # QUOTA_EXCEEDED - reached the limit of TFA codes
195 status = try_get(login_challenge, lambda x: x[5], compat_str)
196 if status == 'QUOTA_EXCEEDED':
197 warn('Exceeded the limit of TFA codes, try later')
200 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
202 warn('Unable to extract TL')
205 tfa_code = self._get_tfa_info('2-step verification code')
209 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
210 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
213 tfa_code = remove_start(tfa_code, 'G-')
216 user_hash, None, 2, None,
218 9, None, None, None, None, None, None, None,
219 [None, tfa_code, True, 2]
223 self._TFA_URL.format(tl), tfa_req,
224 'Submitting TFA code', 'Unable to submit TFA code')
226 if tfa_results is False:
229 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
231 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
233 'Unable to finish TFA: %s' % 'Invalid TFA code'
234 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
237 check_cookie_url = try_get(
238 tfa_results, lambda x: x[0][-1][2], compat_str)
241 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
242 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
243 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
245 challenge = CHALLENGES.get(
247 '%s returned error %s.' % (self.IE_NAME, challenge_str))
248 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
251 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
253 if not check_cookie_url:
254 warn('Unable to extract CheckCookie URL')
257 check_cookie_results = self._download_webpage(
258 check_cookie_url, None, 'Checking cookie', fatal=False)
260 if check_cookie_results is False:
263 if 'https://myaccount.google.com/' not in check_cookie_results:
264 warn('Unable to log in')
269 def _download_webpage_handle(self, *args, **kwargs):
270 query = kwargs.get('query', {}).copy()
271 query['disable_polymer'] = 'true'
272 kwargs['query'] = query
273 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
274 *args, **compat_kwargs(kwargs))
276 def _real_initialize(self):
277 if self._downloader is None:
280 if not self._login():
284 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
285 # Extract entries from page with "Load more" button
286 def _entries(self, page, playlist_id):
287 more_widget_html = content_html = page
288 for page_num in itertools.count(1):
289 for entry in self._process_page(content_html):
292 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
298 while count <= retries:
300 # Downloading page may result in intermittent 5xx HTTP error
301 # that is usually worked around with a retry
302 more = self._download_json(
303 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
304 'Downloading page #%s%s'
305 % (page_num, ' (retry #%d)' % count if count else ''),
306 transform_source=uppercase_escape)
308 except ExtractorError as e:
309 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
315 content_html = more['content_html']
316 if not content_html.strip():
317 # Some webpages show a "Load more" button but they don't
320 more_widget_html = more['load_more_widget_html']
323 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
324 def _process_page(self, content):
325 for video_id, video_title in self.extract_videos_from_page(content):
326 yield self.url_result(video_id, 'Youtube', video_id, video_title)
328 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
329 for mobj in re.finditer(video_re, page):
330 # The link with index 0 is not the first video of the playlist (not sure if still actual)
331 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
333 video_id = mobj.group('id')
334 video_title = unescapeHTML(
335 mobj.group('title')) if 'title' in mobj.groupdict() else None
337 video_title = video_title.strip()
338 if video_title == '► Play all':
341 idx = ids_in_page.index(video_id)
342 if video_title and not titles_in_page[idx]:
343 titles_in_page[idx] = video_title
345 ids_in_page.append(video_id)
346 titles_in_page.append(video_title)
348 def extract_videos_from_page(self, page):
351 self.extract_videos_from_page_impl(
352 self._VIDEO_RE, page, ids_in_page, titles_in_page)
353 return zip(ids_in_page, titles_in_page)
356 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
357 def _process_page(self, content):
358 for playlist_id in orderedSet(re.findall(
359 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
361 yield self.url_result(
362 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
364 def _real_extract(self, url):
365 playlist_id = self._match_id(url)
366 webpage = self._download_webpage(url, playlist_id)
367 title = self._og_search_title(webpage, fatal=False)
368 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
371 class YoutubeIE(YoutubeBaseInfoExtractor):
372 IE_DESC = 'YouTube.com'
373 _VALID_URL = r"""(?x)^
375 (?:https?://|//) # http(s):// or protocol-independent URL
376 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
377 (?:www\.)?deturl\.com/www\.youtube\.com/|
378 (?:www\.)?pwnyoutube\.com/|
379 (?:www\.)?hooktube\.com/|
380 (?:www\.)?yourepeat\.com/|
381 tube\.majestyc\.net/|
382 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
383 (?:(?:www|dev)\.)?invidio\.us/|
384 (?:(?:www|no)\.)?invidiou\.sh/|
385 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
386 (?:www\.)?invidious\.kabi\.tk/|
387 (?:www\.)?invidious\.enkirton\.net/|
388 (?:www\.)?invidious\.13ad\.de/|
389 (?:www\.)?invidious\.mastodon\.host/|
390 (?:www\.)?invidious\.nixnet\.xyz/|
391 (?:www\.)?tube\.poal\.co/|
392 (?:www\.)?vid\.wxzm\.sx/|
393 (?:www\.)?yt\.elukerio\.org/|
394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
404 |(?: # or the v= param in all its forms
405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
406 (?:\?|\#!?) # the params delimiter ? or # or #!
407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
412 youtu\.be| # just youtu.be/xxxx
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
418 )? # all until now is optional -> you can pass the naked ID
419 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
426 (?(1).+)? # if we found the ID, everything can follow
427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
428 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
430 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
431 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
433 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
434 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
435 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
436 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
437 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
439 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
440 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
443 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
445 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
446 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
447 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
452 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
454 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
456 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
457 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459 # Apple HTTP Live Streaming
460 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
461 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
463 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
465 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
470 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
476 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
480 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
485 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
486 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
487 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
488 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
490 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
493 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
494 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
500 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
509 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
511 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
518 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
520 # Dash webm audio with opus inside
521 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
522 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
523 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
526 '_rtmp': {'protocol': 'rtmp'},
528 # av01 video only formats sometimes served with "unknown" codecs
529 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
530 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
545 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
546 'uploader': 'Philipp Hagemeister',
547 'uploader_id': 'phihag',
548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
549 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
550 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
551 'upload_date': '20121002',
552 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553 'categories': ['Science & Technology'],
554 'tags': ['youtube-dl'],
558 'dislike_count': int,
564 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
565 'note': 'Test generic use_cipher_signature video (#897)',
569 'upload_date': '20120506',
570 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
571 'alt_title': 'I Love It (feat. Charli XCX)',
572 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
573 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
574 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
575 'iconic ep', 'iconic', 'love', 'it'],
577 'uploader': 'Icona Pop',
578 'uploader_id': 'IconaPop',
579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
580 'creator': 'Icona Pop',
581 'track': 'I Love It (feat. Charli XCX)',
582 'artist': 'Icona Pop',
586 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
587 'note': 'Test VEVO video with age protection (#956)',
591 'upload_date': '20130703',
592 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
593 'alt_title': 'Tunnel Vision',
594 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
596 'uploader': 'justintimberlakeVEVO',
597 'uploader_id': 'justintimberlakeVEVO',
598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
599 'creator': 'Justin Timberlake',
600 'track': 'Tunnel Vision',
601 'artist': 'Justin Timberlake',
606 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
607 'note': 'Embed-only video (#1746)',
611 'upload_date': '20120608',
612 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
613 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
614 'uploader': 'SET India',
615 'uploader_id': 'setindia',
616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
621 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
622 'note': 'Use the first video ID in the URL',
626 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
627 'uploader': 'Philipp Hagemeister',
628 'uploader_id': 'phihag',
629 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
630 'upload_date': '20121002',
631 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
632 'categories': ['Science & Technology'],
633 'tags': ['youtube-dl'],
637 'dislike_count': int,
640 'skip_download': True,
644 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
645 'note': '256k DASH audio (format 141) via DASH manifest',
649 'upload_date': '20121002',
650 'uploader_id': '8KVIDEO',
651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
653 'uploader': '8KVIDEO',
654 'title': 'UHDTV TEST 8K VIDEO.mp4'
657 'youtube_include_dash_manifest': True,
660 'skip': 'format 141 not served anymore',
662 # DASH manifest with encrypted signature
664 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
668 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
669 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
671 'uploader': 'AfrojackVEVO',
672 'uploader_id': 'AfrojackVEVO',
673 'upload_date': '20131011',
676 'youtube_include_dash_manifest': True,
677 'format': '141/bestaudio[ext=m4a]',
680 # JS player signature function name containing $
682 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
686 'title': 'Taylor Swift - Shake It Off',
687 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
689 'uploader': 'TaylorSwiftVEVO',
690 'uploader_id': 'TaylorSwiftVEVO',
691 'upload_date': '20140818',
692 'creator': 'Taylor Swift',
695 'youtube_include_dash_manifest': True,
696 'format': '141/bestaudio[ext=m4a]',
701 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
706 'upload_date': '20100909',
707 'uploader': 'Amazing Atheist',
708 'uploader_id': 'TheAmazingAtheist',
709 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
710 'title': 'Burning Everyone\'s Koran',
711 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
714 # Normal age-gate video (No vevo, embed allowed)
716 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
720 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
721 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
723 'uploader': 'The Witcher',
724 'uploader_id': 'WitcherGame',
725 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
726 'upload_date': '20140605',
730 # Age-gate video with encrypted signature
732 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
736 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
737 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
739 'uploader': 'LloydVEVO',
740 'uploader_id': 'LloydVEVO',
741 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
742 'upload_date': '20110629',
746 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
747 # YouTube Red ad is not captured for creator
749 'url': '__2ABJjxzNo',
754 'upload_date': '20100430',
755 'uploader_id': 'deadmau5',
756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
757 'creator': 'deadmau5',
758 'description': 'md5:12c56784b8032162bb936a5f76d55360',
759 'uploader': 'deadmau5',
760 'title': 'Deadmau5 - Some Chords (HD)',
761 'alt_title': 'Some Chords',
763 'expected_warnings': [
764 'DASH manifest missing',
767 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
769 'url': 'lqQg6PlCWgI',
774 'upload_date': '20150827',
775 'uploader_id': 'olympic',
776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
777 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
778 'uploader': 'Olympic',
779 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
782 'skip_download': 'requires avconv',
787 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
791 'stretched_ratio': 16 / 9.,
793 'upload_date': '20110310',
794 'uploader_id': 'AllenMeow',
795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
796 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
798 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
801 # url_encoded_fmt_stream_map is empty string
803 'url': 'qEJwOuvDf7I',
807 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809 'upload_date': '20150404',
810 'uploader_id': 'spbelect',
811 'uploader': 'Наблюдатели Петербурга',
814 'skip_download': 'requires avconv',
816 'skip': 'This live event has ended.',
818 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
820 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
824 'title': 'md5:7b81415841e02ecd4313668cde88737a',
825 'description': 'md5:116377fd2963b81ec4ce64b542173306',
827 'upload_date': '20150625',
828 'uploader_id': 'dorappi2000',
829 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
830 'uploader': 'dorappi2000',
831 'formats': 'mincount:31',
833 'skip': 'not actual anymore',
835 # DASH manifest with segment_list
837 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
838 'md5': '8ce563a1d667b599d21064e982ab9e31',
842 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
843 'uploader': 'Airtek',
844 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
845 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
846 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
849 'youtube_include_dash_manifest': True,
850 'format': '135', # bestvideo
852 'skip': 'This live event has ended.',
855 # Multifeed videos (multiple cameras), URL is for Main Camera
856 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
859 'title': 'teamPGP: Rocket League Noob Stream',
860 'description': 'md5:dc7872fb300e143831327f1bae3af010',
866 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
867 'description': 'md5:dc7872fb300e143831327f1bae3af010',
869 'upload_date': '20150721',
870 'uploader': 'Beer Games Beer',
871 'uploader_id': 'beergamesbeer',
872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
873 'license': 'Standard YouTube License',
879 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
880 'description': 'md5:dc7872fb300e143831327f1bae3af010',
882 'upload_date': '20150721',
883 'uploader': 'Beer Games Beer',
884 'uploader_id': 'beergamesbeer',
885 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
886 'license': 'Standard YouTube License',
892 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
893 'description': 'md5:dc7872fb300e143831327f1bae3af010',
895 'upload_date': '20150721',
896 'uploader': 'Beer Games Beer',
897 'uploader_id': 'beergamesbeer',
898 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
899 'license': 'Standard YouTube License',
905 'title': 'teamPGP: Rocket League Noob Stream (zim)',
906 'description': 'md5:dc7872fb300e143831327f1bae3af010',
908 'upload_date': '20150721',
909 'uploader': 'Beer Games Beer',
910 'uploader_id': 'beergamesbeer',
911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
912 'license': 'Standard YouTube License',
916 'skip_download': True,
918 'skip': 'This video is not available.',
921 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
922 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
925 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
928 'skip': 'Not multifeed anymore',
931 'url': 'https://vid.plus/FlRa-iH7PGw',
932 'only_matching': True,
935 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
936 'only_matching': True,
939 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
940 # Also tests cut-off URL expansion in video description (see
941 # https://github.com/ytdl-org/youtube-dl/issues/1892,
942 # https://github.com/ytdl-org/youtube-dl/issues/8164)
943 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
947 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
948 'alt_title': 'Dark Walk - Position Music',
949 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
951 'upload_date': '20151119',
952 'uploader_id': 'IronSoulElf',
953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
954 'uploader': 'IronSoulElf',
955 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
956 'track': 'Dark Walk - Position Music',
957 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
958 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
961 'skip_download': True,
965 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
966 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
967 'only_matching': True,
970 # Video with yt:stretch=17:0
971 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
975 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
976 'description': 'md5:ee18a25c350637c8faff806845bddee9',
977 'upload_date': '20151107',
978 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
979 'uploader': 'CH GAMER DROID',
982 'skip_download': True,
984 'skip': 'This video does not exist.',
987 # Video licensed under Creative Commons
988 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
992 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
993 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
995 'upload_date': '20150127',
996 'uploader_id': 'BerkmanCenter',
997 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
998 'uploader': 'The Berkman Klein Center for Internet & Society',
999 'license': 'Creative Commons Attribution license (reuse allowed)',
1002 'skip_download': True,
1006 # Channel-like uploader_url
1007 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009 'id': 'eQcmzGIKrzg',
1011 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1012 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1014 'upload_date': '20151119',
1015 'uploader': 'Bernie Sanders',
1016 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1018 'license': 'Creative Commons Attribution license (reuse allowed)',
1021 'skip_download': True,
1025 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1026 'only_matching': True,
1029 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1030 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1031 'only_matching': True,
1034 # Rental video preview
1035 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037 'id': 'uGpuVWrhIzE',
1039 'title': 'Piku - Trailer',
1040 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1041 'upload_date': '20150811',
1042 'uploader': 'FlixMatrix',
1043 'uploader_id': 'FlixMatrixKaravan',
1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1045 'license': 'Standard YouTube License',
1048 'skip_download': True,
1050 'skip': 'This video is not available.',
1053 # YouTube Red video with episode data
1054 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056 'id': 'iqKdEhx-dD4',
1058 'title': 'Isolation - Mind Field (Ep 1)',
1059 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1061 'upload_date': '20170118',
1062 'uploader': 'Vsauce',
1063 'uploader_id': 'Vsauce',
1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1065 'series': 'Mind Field',
1067 'episode_number': 1,
1070 'skip_download': True,
1072 'expected_warnings': [
1073 'Skipping DASH manifest',
1077 # The following content has been identified by the YouTube community
1078 # as inappropriate or offensive to some audiences.
1079 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081 'id': '6SJNVb0GnPI',
1083 'title': 'Race Differences in Intelligence',
1084 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086 'upload_date': '20140124',
1087 'uploader': 'New Century Foundation',
1088 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1092 'skip_download': True,
1097 'url': '1t24XAntNCY',
1098 'only_matching': True,
1101 # geo restricted to JP
1102 'url': 'sJL6WA-aGkQ',
1103 'only_matching': True,
1106 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1107 'only_matching': True,
1110 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1111 'only_matching': True,
1115 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1116 'only_matching': True,
1119 # Video with unsupported adaptive stream type formats
1120 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122 'id': 'Z4Vy8R84T1U',
1124 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1125 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127 'upload_date': '20130923',
1128 'uploader': 'Amelia Putri Harwita',
1129 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1130 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1131 'formats': 'maxcount:10',
1134 'skip_download': True,
1135 'youtube_include_dash_manifest': False,
1139 # Youtube Music Auto-generated description
1140 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'id': 'MgNrAu2pzNs',
1144 'title': 'Voyeur Girl',
1145 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1146 'upload_date': '20190312',
1147 'uploader': 'Various Artists - Topic',
1148 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1149 'artist': 'Stephen',
1150 'track': 'Voyeur Girl',
1151 'album': 'it\'s too much love to know my dear',
1152 'release_date': '20190313',
1153 'release_year': 2019,
1156 'skip_download': True,
1160 # Youtube Music Auto-generated description
1161 # Retrieve 'artist' field from 'Artist:' in video description
1162 # when it is present on youtube music video
1163 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'id': 'k0jLE7tTwjY',
1167 'title': 'Latch Feat. Sam Smith',
1168 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1169 'upload_date': '20150110',
1170 'uploader': 'Various Artists - Topic',
1171 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1172 'artist': 'Disclosure',
1173 'track': 'Latch Feat. Sam Smith',
1174 'album': 'Latch Featuring Sam Smith',
1175 'release_date': '20121008',
1176 'release_year': 2012,
1179 'skip_download': True,
1183 # Youtube Music Auto-generated description
1184 # handle multiple artists on youtube music video
1185 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'id': '74qn0eJSjpA',
1189 'title': 'Eastside',
1190 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1191 'upload_date': '20180710',
1192 'uploader': 'Benny Blanco - Topic',
1193 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1194 'artist': 'benny blanco, Halsey, Khalid',
1195 'track': 'Eastside',
1196 'album': 'Eastside',
1197 'release_date': '20180713',
1198 'release_year': 2018,
1201 'skip_download': True,
1205 # Youtube Music Auto-generated description
1206 # handle youtube music video with release_year and no release_date
1207 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'id': '-hcAI0g-f5M',
1211 'title': 'Put It On Me',
1212 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1213 'upload_date': '20180426',
1214 'uploader': 'Matt Maeson - Topic',
1215 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1216 'artist': 'Matt Maeson',
1217 'track': 'Put It On Me',
1218 'album': 'The Hearse',
1219 'release_date': None,
1220 'release_year': 2018,
1223 'skip_download': True,
1228 def __init__(self, *args, **kwargs):
1229 super(YoutubeIE, self).__init__(*args, **kwargs)
1230 self._player_cache = {}
1232 def report_video_info_webpage_download(self, video_id):
1233 """Report attempt to download video info webpage."""
1234 self.to_screen('%s: Downloading video info webpage' % video_id)
1236 def report_information_extraction(self, video_id):
1237 """Report attempt to extract video information."""
1238 self.to_screen('%s: Extracting video information' % video_id)
1240 def report_unavailable_format(self, video_id, format):
1241 """Report extracted video URL."""
1242 self.to_screen('%s: Format %s not available' % (video_id, format))
1244 def report_rtmp_download(self):
1245 """Indicate the download will use the RTMP protocol."""
1246 self.to_screen('RTMP download detected')
1248 def _signature_cache_id(self, example_sig):
1249 """ Return a string representation of a signature """
1250 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1252 def _extract_signature_function(self, video_id, player_url, example_sig):
1254 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1257 raise ExtractorError('Cannot identify player %r' % player_url)
1258 player_type = id_m.group('ext')
1259 player_id = id_m.group('id')
1261 # Read from filesystem cache
1262 func_id = '%s_%s_%s' % (
1263 player_type, player_id, self._signature_cache_id(example_sig))
1264 assert os.path.basename(func_id) == func_id
1266 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1267 if cache_spec is not None:
1268 return lambda s: ''.join(s[i] for i in cache_spec)
1271 'Downloading player %s' % player_url
1272 if self._downloader.params.get('verbose') else
1273 'Downloading %s player %s' % (player_type, player_id)
1275 if player_type == 'js':
1276 code = self._download_webpage(
1277 player_url, video_id,
1279 errnote='Download of %s failed' % player_url)
1280 res = self._parse_sig_js(code)
1281 elif player_type == 'swf':
1282 urlh = self._request_webpage(
1283 player_url, video_id,
1285 errnote='Download of %s failed' % player_url)
1287 res = self._parse_sig_swf(code)
1289 assert False, 'Invalid player type %r' % player_type
1291 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1292 cache_res = res(test_string)
1293 cache_spec = [ord(c) for c in cache_res]
1295 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1298 def _print_sig_code(self, func, example_sig):
1299 def gen_sig_code(idxs):
1300 def _genslice(start, end, step):
1301 starts = '' if start == 0 else str(start)
1302 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1303 steps = '' if step == 1 else (':%d' % step)
1304 return 's[%s%s%s]' % (starts, ends, steps)
1307 # Quelch pyflakes warnings - start will be set when step is set
1308 start = '(Never used)'
1309 for i, prev in zip(idxs[1:], idxs[:-1]):
1310 if step is not None:
1311 if i - prev == step:
1313 yield _genslice(start, prev, step)
1316 if i - prev in [-1, 1]:
1321 yield 's[%d]' % prev
1325 yield _genslice(start, i, step)
1327 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1328 cache_res = func(test_string)
1329 cache_spec = [ord(c) for c in cache_res]
1330 expr_code = ' + '.join(gen_sig_code(cache_spec))
1331 signature_id_tuple = '(%s)' % (
1332 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1333 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1334 ' return %s\n') % (signature_id_tuple, expr_code)
1335 self.to_screen('Extracted signature function:\n' + code)
1337 def _parse_sig_js(self, jscode):
1338 funcname = self._search_regex(
1339 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1343 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1344 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1347 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1351 jscode, 'Initial JS player signature function name', group='sig')
1353 jsi = JSInterpreter(jscode)
1354 initial_function = jsi.extract_function(funcname)
1355 return lambda s: initial_function([s])
1357 def _parse_sig_swf(self, file_contents):
1358 swfi = SWFInterpreter(file_contents)
1359 TARGET_CLASSNAME = 'SignatureDecipher'
1360 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1361 initial_function = swfi.extract_function(searched_class, 'decipher')
1362 return lambda s: initial_function([s])
1364 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1365 """Turn the encrypted s field into a working signature"""
1367 if player_url is None:
1368 raise ExtractorError('Cannot decrypt signature without player_url')
1370 if player_url.startswith('//'):
1371 player_url = 'https:' + player_url
1372 elif not re.match(r'https?://', player_url):
1373 player_url = compat_urlparse.urljoin(
1374 'https://www.youtube.com', player_url)
1376 player_id = (player_url, self._signature_cache_id(s))
1377 if player_id not in self._player_cache:
1378 func = self._extract_signature_function(
1379 video_id, player_url, s
1381 self._player_cache[player_id] = func
1382 func = self._player_cache[player_id]
1383 if self._downloader.params.get('youtube_print_sig_code'):
1384 self._print_sig_code(func, s)
1386 except Exception as e:
1387 tb = traceback.format_exc()
1388 raise ExtractorError(
1389 'Signature extraction failed: ' + tb, cause=e)
1391 def _get_subtitles(self, video_id, webpage):
1393 subs_doc = self._download_xml(
1394 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1395 video_id, note=False)
1396 except ExtractorError as err:
1397 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1401 for track in subs_doc.findall('track'):
1402 lang = track.attrib['lang_code']
1403 if lang in sub_lang_list:
1406 for ext in self._SUBTITLE_FORMATS:
1407 params = compat_urllib_parse_urlencode({
1411 'name': track.attrib['name'].encode('utf-8'),
1413 sub_formats.append({
1414 'url': 'https://www.youtube.com/api/timedtext?' + params,
1417 sub_lang_list[lang] = sub_formats
1418 if not sub_lang_list:
1419 self._downloader.report_warning('video doesn\'t have subtitles')
1421 return sub_lang_list
1423 def _get_ytplayer_config(self, video_id, webpage):
1425 # User data may contain arbitrary character sequences that may affect
1426 # JSON extraction with regex, e.g. when '};' is contained the second
1427 # regex won't capture the whole JSON. Yet working around by trying more
1428 # concrete regex first keeping in mind proper quoted string handling
1429 # to be implemented in future that will replace this workaround (see
1430 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1431 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1432 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1433 r';ytplayer\.config\s*=\s*({.+?});',
1435 config = self._search_regex(
1436 patterns, webpage, 'ytplayer.config', default=None)
1438 return self._parse_json(
1439 uppercase_escape(config), video_id, fatal=False)
1441 def _get_automatic_captions(self, video_id, webpage):
1442 """We need the webpage for getting the captions url, pass it as an
1443 argument to speed up the process."""
1444 self.to_screen('%s: Looking for automatic captions' % video_id)
1445 player_config = self._get_ytplayer_config(video_id, webpage)
1446 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1447 if not player_config:
1448 self._downloader.report_warning(err_msg)
1451 args = player_config['args']
1452 caption_url = args.get('ttsurl')
1454 timestamp = args['timestamp']
1455 # We get the available subtitles
1456 list_params = compat_urllib_parse_urlencode({
1461 list_url = caption_url + '&' + list_params
1462 caption_list = self._download_xml(list_url, video_id)
1463 original_lang_node = caption_list.find('track')
1464 if original_lang_node is None:
1465 self._downloader.report_warning('Video doesn\'t have automatic captions')
1467 original_lang = original_lang_node.attrib['lang_code']
1468 caption_kind = original_lang_node.attrib.get('kind', '')
1471 for lang_node in caption_list.findall('target'):
1472 sub_lang = lang_node.attrib['lang_code']
1474 for ext in self._SUBTITLE_FORMATS:
1475 params = compat_urllib_parse_urlencode({
1476 'lang': original_lang,
1480 'kind': caption_kind,
1482 sub_formats.append({
1483 'url': caption_url + '&' + params,
1486 sub_lang_list[sub_lang] = sub_formats
1487 return sub_lang_list
1489 def make_captions(sub_url, sub_langs):
1490 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1491 caption_qs = compat_parse_qs(parsed_sub_url.query)
1493 for sub_lang in sub_langs:
1495 for ext in self._SUBTITLE_FORMATS:
1497 'tlang': [sub_lang],
1500 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1501 query=compat_urllib_parse_urlencode(caption_qs, True)))
1502 sub_formats.append({
1506 captions[sub_lang] = sub_formats
1509 # New captions format as of 22.06.2017
1510 player_response = args.get('player_response')
1511 if player_response and isinstance(player_response, compat_str):
1512 player_response = self._parse_json(
1513 player_response, video_id, fatal=False)
1515 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1516 base_url = renderer['captionTracks'][0]['baseUrl']
1518 for lang in renderer['translationLanguages']:
1519 lang_code = lang.get('languageCode')
1521 sub_lang_list.append(lang_code)
1522 return make_captions(base_url, sub_lang_list)
1524 # Some videos don't provide ttsurl but rather caption_tracks and
1525 # caption_translation_languages (e.g. 20LmZk1hakA)
1526 # Does not used anymore as of 22.06.2017
1527 caption_tracks = args['caption_tracks']
1528 caption_translation_languages = args['caption_translation_languages']
1529 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1531 for lang in caption_translation_languages.split(','):
1532 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1533 sub_lang = lang_qs.get('lc', [None])[0]
1535 sub_lang_list.append(sub_lang)
1536 return make_captions(caption_url, sub_lang_list)
1537 # An extractor error can be raise by the download process if there are
1538 # no automatic captions but there are subtitles
1539 except (KeyError, IndexError, ExtractorError):
1540 self._downloader.report_warning(err_msg)
1543 def _mark_watched(self, video_id, video_info, player_response):
1544 playback_url = url_or_none(try_get(
1546 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1547 video_info, lambda x: x['videostats_playback_base_url'][0]))
1548 if not playback_url:
1550 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1551 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1553 # cpn generation algorithm is reverse engineered from base.js.
1554 # In fact it works even with dummy cpn.
1555 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1556 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1562 playback_url = compat_urlparse.urlunparse(
1563 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1565 self._download_webpage(
1566 playback_url, video_id, 'Marking watched',
1567 'Unable to mark watched', fatal=False)
1570 def _extract_urls(webpage):
1571 # Embedded YouTube player
1573 unescapeHTML(mobj.group('url'))
1574 for mobj in re.finditer(r'''(?x)
1584 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1585 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1588 # lazyYT YouTube embed
1589 entries.extend(list(map(
1591 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1593 # Wordpress "YouTube Video Importer" plugin
1594 matches = re.findall(r'''(?x)<div[^>]+
1595 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1596 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1597 entries.extend(m[-1] for m in matches)
1602 def _extract_url(webpage):
1603 urls = YoutubeIE._extract_urls(webpage)
1604 return urls[0] if urls else None
1607 def extract_id(cls, url):
1608 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1610 raise ExtractorError('Invalid URL: %s' % url)
1611 video_id = mobj.group(2)
1615 def _extract_chapters(description, duration):
1618 chapter_lines = re.findall(
1619 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1621 if not chapter_lines:
1624 for next_num, (chapter_line, time_point) in enumerate(
1625 chapter_lines, start=1):
1626 start_time = parse_duration(time_point)
1627 if start_time is None:
1629 if start_time > duration:
1631 end_time = (duration if next_num == len(chapter_lines)
1632 else parse_duration(chapter_lines[next_num][1]))
1633 if end_time is None:
1635 if end_time > duration:
1637 if start_time > end_time:
1639 chapter_title = re.sub(
1640 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1641 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1643 'start_time': start_time,
1644 'end_time': end_time,
1645 'title': chapter_title,
1649 def _real_extract(self, url):
1650 url, smuggled_data = unsmuggle_url(url, {})
1653 'http' if self._downloader.params.get('prefer_insecure', False)
1658 parsed_url = compat_urllib_parse_urlparse(url)
1659 for component in [parsed_url.fragment, parsed_url.query]:
1660 query = compat_parse_qs(component)
1661 if start_time is None and 't' in query:
1662 start_time = parse_duration(query['t'][0])
1663 if start_time is None and 'start' in query:
1664 start_time = parse_duration(query['start'][0])
1665 if end_time is None and 'end' in query:
1666 end_time = parse_duration(query['end'][0])
1668 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1669 mobj = re.search(self._NEXT_URL_RE, url)
1671 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1672 video_id = self.extract_id(url)
1675 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1676 video_webpage = self._download_webpage(url, video_id)
1678 # Attempt to extract SWF player URL
1679 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1680 if mobj is not None:
1681 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1687 def add_dash_mpd(video_info):
1688 dash_mpd = video_info.get('dashmpd')
1689 if dash_mpd and dash_mpd[0] not in dash_mpds:
1690 dash_mpds.append(dash_mpd[0])
1692 def add_dash_mpd_pr(pl_response):
1693 dash_mpd = url_or_none(try_get(
1694 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1696 if dash_mpd and dash_mpd not in dash_mpds:
1697 dash_mpds.append(dash_mpd)
1702 def extract_view_count(v_info):
1703 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1705 def extract_token(v_info):
1706 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1708 def extract_player_response(player_response, video_id):
1709 pl_response = str_or_none(player_response)
1712 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1713 if isinstance(pl_response, dict):
1714 add_dash_mpd_pr(pl_response)
1717 player_response = {}
1720 embed_webpage = None
1721 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1723 # We simulate the access to the video from www.youtube.com/v/{video_id}
1724 # this can be viewed without login into Youtube
1725 url = proto + '://www.youtube.com/embed/%s' % video_id
1726 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1727 data = compat_urllib_parse_urlencode({
1728 'video_id': video_id,
1729 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1730 'sts': self._search_regex(
1731 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1733 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1734 video_info_webpage = self._download_webpage(
1735 video_info_url, video_id,
1736 note='Refetching age-gated info webpage',
1737 errnote='unable to download video info webpage')
1738 video_info = compat_parse_qs(video_info_webpage)
1739 pl_response = video_info.get('player_response', [None])[0]
1740 player_response = extract_player_response(pl_response, video_id)
1741 add_dash_mpd(video_info)
1742 view_count = extract_view_count(video_info)
1747 # Try looking directly into the video webpage
1748 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1750 args = ytplayer_config['args']
1751 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1752 # Convert to the same format returned by compat_parse_qs
1753 video_info = dict((k, [v]) for k, v in args.items())
1754 add_dash_mpd(video_info)
1755 # Rental video is not rented but preview is available (e.g.
1756 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1757 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1758 if not video_info and args.get('ypc_vid'):
1759 return self.url_result(
1760 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1761 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1763 sts = ytplayer_config.get('sts')
1764 if not player_response:
1765 player_response = extract_player_response(args.get('player_response'), video_id)
1766 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1767 add_dash_mpd_pr(player_response)
1768 # We also try looking in get_video_info since it may contain different dashmpd
1769 # URL that points to a DASH manifest with possibly different itag set (some itags
1770 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1771 # manifest pointed by get_video_info's dashmpd).
1772 # The general idea is to take a union of itags of both DASH manifests (for example
1773 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1774 self.report_video_info_webpage_download(video_id)
1775 for el in ('embedded', 'detailpage', 'vevo', ''):
1777 'video_id': video_id,
1787 video_info_webpage = self._download_webpage(
1788 '%s://www.youtube.com/get_video_info' % proto,
1789 video_id, note=False,
1790 errnote='unable to download video info webpage',
1791 fatal=False, query=query)
1792 if not video_info_webpage:
1794 get_video_info = compat_parse_qs(video_info_webpage)
1795 if not player_response:
1796 pl_response = get_video_info.get('player_response', [None])[0]
1797 player_response = extract_player_response(pl_response, video_id)
1798 add_dash_mpd(get_video_info)
1799 if view_count is None:
1800 view_count = extract_view_count(get_video_info)
1802 video_info = get_video_info
1803 get_token = extract_token(get_video_info)
1805 # Different get_video_info requests may report different results, e.g.
1806 # some may report video unavailability, but some may serve it without
1807 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1808 # the original webpage as well as el=info and el=embedded get_video_info
1809 # requests report video unavailability due to geo restriction while
1810 # el=detailpage succeeds and returns valid data). This is probably
1811 # due to YouTube measures against IP ranges of hosting providers.
1812 # Working around by preferring the first succeeded video_info containing
1813 # the token if no such video_info yet was found.
1814 token = extract_token(video_info)
1816 video_info = get_video_info
1819 def extract_unavailable_message():
1821 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1822 msg = self._html_search_regex(
1823 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1824 video_webpage, 'unavailable %s' % kind, default=None)
1826 messages.append(msg)
1828 return '\n'.join(messages)
1831 unavailable_message = extract_unavailable_message()
1832 if not unavailable_message:
1833 unavailable_message = 'Unable to extract video data'
1834 raise ExtractorError(
1835 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1837 video_details = try_get(
1838 player_response, lambda x: x['videoDetails'], dict) or {}
1840 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1842 self._downloader.report_warning('Unable to extract video title')
1845 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1846 if video_description:
1849 redir_url = compat_urlparse.urljoin(url, m.group(1))
1850 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1851 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1852 qs = compat_parse_qs(parsed_redir_url.query)
1858 description_original = video_description = re.sub(r'''(?x)
1860 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1861 (?:title|href)="([^"]+)"\s+
1862 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1866 ''', replace_url, video_description)
1867 video_description = clean_html(video_description)
1869 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1871 if not smuggled_data.get('force_singlefeed', False):
1872 if not self._downloader.params.get('noplaylist'):
1873 multifeed_metadata_list = try_get(
1875 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1876 compat_str) or try_get(
1877 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1878 if multifeed_metadata_list:
1881 for feed in multifeed_metadata_list.split(','):
1882 # Unquote should take place before split on comma (,) since textual
1883 # fields may contain comma as well (see
1884 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1885 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1887 '_type': 'url_transparent',
1888 'ie_key': 'Youtube',
1890 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1891 {'force_singlefeed': True}),
1892 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1894 feed_ids.append(feed_data['id'][0])
1896 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1897 % (', '.join(feed_ids), video_id))
1898 return self.playlist_result(entries, video_id, video_title, video_description)
1900 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1902 if view_count is None:
1903 view_count = extract_view_count(video_info)
1904 if view_count is None and video_details:
1905 view_count = int_or_none(video_details.get('viewCount'))
1908 is_live = bool_or_none(video_details.get('isLive'))
1910 # Check for "rental" videos
1911 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1912 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1914 def _extract_filesize(media_url):
1915 return int_or_none(self._search_regex(
1916 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1918 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1919 self.report_rtmp_download()
1921 'format_id': '_rtmp',
1923 'url': video_info['conn'][0],
1924 'player_url': player_url,
1926 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1927 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1928 if 'rtmpe%3Dyes' in encoded_url_map:
1929 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1931 fmt_list = video_info.get('fmt_list', [''])[0]
1933 for fmt in fmt_list.split(','):
1934 spec = fmt.split('/')
1936 width_height = spec[1].split('x')
1937 if len(width_height) == 2:
1938 formats_spec[spec[0]] = {
1939 'resolution': spec[1],
1940 'width': int_or_none(width_height[0]),
1941 'height': int_or_none(width_height[1]),
1943 q = qualities(['small', 'medium', 'hd720'])
1944 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1945 if streaming_formats:
1946 for fmt in streaming_formats:
1947 itag = str_or_none(fmt.get('itag'))
1950 quality = fmt.get('quality')
1951 quality_label = fmt.get('qualityLabel') or quality
1952 formats_spec[itag] = {
1953 'asr': int_or_none(fmt.get('audioSampleRate')),
1954 'filesize': int_or_none(fmt.get('contentLength')),
1955 'format_note': quality_label,
1956 'fps': int_or_none(fmt.get('fps')),
1957 'height': int_or_none(fmt.get('height')),
1958 'quality': q(quality),
1959 # bitrate for itag 43 is always 2147483647
1960 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1961 'width': int_or_none(fmt.get('width')),
1964 for url_data_str in encoded_url_map.split(','):
1965 url_data = compat_parse_qs(url_data_str)
1966 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1968 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1969 # Unsupported FORMAT_STREAM_TYPE_OTF
1970 if stream_type == 3:
1972 format_id = url_data['itag'][0]
1973 url = url_data['url'][0]
1975 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1976 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1977 jsplayer_url_json = self._search_regex(
1979 embed_webpage if age_gate else video_webpage,
1980 'JS player URL (1)', default=None)
1981 if not jsplayer_url_json and not age_gate:
1982 # We need the embed website after all
1983 if embed_webpage is None:
1984 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1985 embed_webpage = self._download_webpage(
1986 embed_url, video_id, 'Downloading embed webpage')
1987 jsplayer_url_json = self._search_regex(
1988 ASSETS_RE, embed_webpage, 'JS player URL')
1990 player_url = json.loads(jsplayer_url_json)
1991 if player_url is None:
1992 player_url_json = self._search_regex(
1993 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1994 video_webpage, 'age gate player URL')
1995 player_url = json.loads(player_url_json)
1997 if 'sig' in url_data:
1998 url += '&signature=' + url_data['sig'][0]
1999 elif 's' in url_data:
2000 encrypted_sig = url_data['s'][0]
2002 if self._downloader.params.get('verbose'):
2003 if player_url is None:
2004 player_version = 'unknown'
2005 player_desc = 'unknown'
2007 if player_url.endswith('swf'):
2008 player_version = self._search_regex(
2009 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2010 'flash player', fatal=False)
2011 player_desc = 'flash player %s' % player_version
2013 player_version = self._search_regex(
2014 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2015 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2017 'html5 player', fatal=False)
2018 player_desc = 'html5 player %s' % player_version
2020 parts_sizes = self._signature_cache_id(encrypted_sig)
2021 self.to_screen('{%s} signature length %s, %s' %
2022 (format_id, parts_sizes, player_desc))
2024 signature = self._decrypt_signature(
2025 encrypted_sig, video_id, player_url, age_gate)
2026 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2027 url += '&%s=%s' % (sp, signature)
2028 if 'ratebypass' not in url:
2029 url += '&ratebypass=yes'
2032 'format_id': format_id,
2034 'player_url': player_url,
2036 if format_id in self._formats:
2037 dct.update(self._formats[format_id])
2038 if format_id in formats_spec:
2039 dct.update(formats_spec[format_id])
2041 # Some itags are not included in DASH manifest thus corresponding formats will
2042 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2043 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2044 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2045 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2047 filesize = int_or_none(url_data.get(
2048 'clen', [None])[0]) or _extract_filesize(url)
2050 quality = url_data.get('quality', [None])[0]
2053 'filesize': filesize,
2054 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2057 'fps': int_or_none(url_data.get('fps', [None])[0]),
2058 'format_note': url_data.get('quality_label', [None])[0] or quality,
2059 'quality': q(quality),
2061 for key, value in more_fields.items():
2064 type_ = url_data.get('type', [None])[0]
2066 type_split = type_.split(';')
2067 kind_ext = type_split[0].split('/')
2068 if len(kind_ext) == 2:
2070 dct['ext'] = mimetype2ext(type_split[0])
2071 if kind in ('audio', 'video'):
2073 for mobj in re.finditer(
2074 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2075 if mobj.group('key') == 'codecs':
2076 codecs = mobj.group('val')
2079 dct.update(parse_codecs(codecs))
2080 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2081 dct['downloader_options'] = {
2082 # Youtube throttles chunks >~10M
2083 'http_chunk_size': 10485760,
2088 url_or_none(try_get(
2090 lambda x: x['streamingData']['hlsManifestUrl'],
2092 or url_or_none(try_get(
2093 video_info, lambda x: x['hlsvp'][0], compat_str)))
2096 m3u8_formats = self._extract_m3u8_formats(
2097 manifest_url, video_id, 'mp4', fatal=False)
2098 for a_format in m3u8_formats:
2099 itag = self._search_regex(
2100 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2102 a_format['format_id'] = itag
2103 if itag in self._formats:
2104 dct = self._formats[itag].copy()
2105 dct.update(a_format)
2107 a_format['player_url'] = player_url
2108 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2109 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2110 formats.append(a_format)
2112 error_message = extract_unavailable_message()
2113 if not error_message:
2114 error_message = clean_html(try_get(
2115 player_response, lambda x: x['playabilityStatus']['reason'],
2117 if not error_message:
2118 error_message = clean_html(
2119 try_get(video_info, lambda x: x['reason'][0], compat_str))
2121 raise ExtractorError(error_message, expected=True)
2122 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2125 video_uploader = try_get(
2126 video_info, lambda x: x['author'][0],
2127 compat_str) or str_or_none(video_details.get('author'))
2129 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2131 self._downloader.report_warning('unable to extract uploader name')
2134 video_uploader_id = None
2135 video_uploader_url = None
2137 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2139 if mobj is not None:
2140 video_uploader_id = mobj.group('uploader_id')
2141 video_uploader_url = mobj.group('uploader_url')
2143 self._downloader.report_warning('unable to extract uploader nickname')
2146 str_or_none(video_details.get('channelId'))
2147 or self._html_search_meta(
2148 'channelId', video_webpage, 'channel id', default=None)
2149 or self._search_regex(
2150 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2151 video_webpage, 'channel id', default=None, group='id'))
2152 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2155 # We try first to get a high quality image:
2156 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2157 video_webpage, re.DOTALL)
2158 if m_thumb is not None:
2159 video_thumbnail = m_thumb.group(1)
2160 elif 'thumbnail_url' not in video_info:
2161 self._downloader.report_warning('unable to extract video thumbnail')
2162 video_thumbnail = None
2163 else: # don't panic if we can't find it
2164 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2167 upload_date = self._html_search_meta(
2168 'datePublished', video_webpage, 'upload date', default=None)
2170 upload_date = self._search_regex(
2171 [r'(?s)id="eow-date.*?>(.*?)</span>',
2172 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2173 video_webpage, 'upload date', default=None)
2174 upload_date = unified_strdate(upload_date)
2176 video_license = self._html_search_regex(
2177 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2178 video_webpage, 'license', default=None)
2180 m_music = re.search(
2182 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2190 \bhref=["\']/red[^>]*>| # drop possible
2191 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2198 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2199 video_creator = clean_html(m_music.group('creator'))
2201 video_alt_title = video_creator = None
2203 def extract_meta(field):
2204 return self._html_search_regex(
2205 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2206 video_webpage, field, default=None)
2208 track = extract_meta('Song')
2209 artist = extract_meta('Artist')
2210 album = extract_meta('Album')
2212 # Youtube Music Auto-generated description
2213 release_date = release_year = None
2214 if video_description:
2215 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2218 track = mobj.group('track').strip()
2220 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2222 album = mobj.group('album'.strip())
2223 release_year = mobj.group('release_year')
2224 release_date = mobj.group('release_date')
2226 release_date = release_date.replace('-', '')
2227 if not release_year:
2228 release_year = int(release_date[:4])
2230 release_year = int(release_year)
2232 m_episode = re.search(
2233 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2236 series = unescapeHTML(m_episode.group('series'))
2237 season_number = int(m_episode.group('season'))
2238 episode_number = int(m_episode.group('episode'))
2240 series = season_number = episode_number = None
2242 m_cat_container = self._search_regex(
2243 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2244 video_webpage, 'categories', default=None)
2246 category = self._html_search_regex(
2247 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2249 video_categories = None if category is None else [category]
2251 video_categories = None
2254 unescapeHTML(m.group('content'))
2255 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2257 def _extract_count(count_name):
2258 return str_to_int(self._search_regex(
2259 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2260 % re.escape(count_name),
2261 video_webpage, count_name, default=None))
2263 like_count = _extract_count('like')
2264 dislike_count = _extract_count('dislike')
2266 if view_count is None:
2267 view_count = str_to_int(self._search_regex(
2268 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2269 'view count', default=None))
2272 float_or_none(video_details.get('averageRating'))
2273 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2276 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2277 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2279 video_duration = try_get(
2280 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2281 if not video_duration:
2282 video_duration = int_or_none(video_details.get('lengthSeconds'))
2283 if not video_duration:
2284 video_duration = parse_duration(self._html_search_meta(
2285 'duration', video_webpage, 'video duration'))
2288 video_annotations = None
2289 if self._downloader.params.get('writeannotations', False):
2290 xsrf_token = self._search_regex(
2291 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2292 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2293 invideo_url = try_get(
2294 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2295 if xsrf_token and invideo_url:
2296 xsrf_field_name = self._search_regex(
2297 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2298 video_webpage, 'xsrf field name',
2299 group='xsrf_field_name', default='session_token')
2300 video_annotations = self._download_webpage(
2301 self._proto_relative_url(invideo_url),
2302 video_id, note='Downloading annotations',
2303 errnote='Unable to download video annotations', fatal=False,
2304 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2306 chapters = self._extract_chapters(description_original, video_duration)
2308 # Look for the DASH manifest
2309 if self._downloader.params.get('youtube_include_dash_manifest', True):
2310 dash_mpd_fatal = True
2311 for mpd_url in dash_mpds:
2314 def decrypt_sig(mobj):
2316 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2317 return '/signature/%s' % dec_s
2319 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2321 for df in self._extract_mpd_formats(
2322 mpd_url, video_id, fatal=dash_mpd_fatal,
2323 formats_dict=self._formats):
2324 if not df.get('filesize'):
2325 df['filesize'] = _extract_filesize(df['url'])
2326 # Do not overwrite DASH format found in some previous DASH manifest
2327 if df['format_id'] not in dash_formats:
2328 dash_formats[df['format_id']] = df
2329 # Additional DASH manifests may end up in HTTP Error 403 therefore
2330 # allow them to fail without bug report message if we already have
2331 # some DASH manifest succeeded. This is temporary workaround to reduce
2332 # burst of bug reports until we figure out the reason and whether it
2333 # can be fixed at all.
2334 dash_mpd_fatal = False
2335 except (ExtractorError, KeyError) as e:
2336 self.report_warning(
2337 'Skipping DASH manifest: %r' % e, video_id)
2339 # Remove the formats we found through non-DASH, they
2340 # contain less info and it can be wrong, because we use
2341 # fixed values (for example the resolution). See
2342 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2344 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2345 formats.extend(dash_formats.values())
2347 # Check for malformed aspect ratio
2348 stretched_m = re.search(
2349 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2352 w = float(stretched_m.group('w'))
2353 h = float(stretched_m.group('h'))
2354 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2355 # We will only process correct ratios.
2359 if f.get('vcodec') != 'none':
2360 f['stretched_ratio'] = ratio
2363 token = extract_token(video_info)
2365 if 'reason' in video_info:
2366 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2367 regions_allowed = self._html_search_meta(
2368 'regionsAllowed', video_webpage, default=None)
2369 countries = regions_allowed.split(',') if regions_allowed else None
2370 self.raise_geo_restricted(
2371 msg=video_info['reason'][0], countries=countries)
2372 reason = video_info['reason'][0]
2373 if 'Invalid parameters' in reason:
2374 unavailable_message = extract_unavailable_message()
2375 if unavailable_message:
2376 reason = unavailable_message
2377 raise ExtractorError(
2378 'YouTube said: %s' % reason,
2379 expected=True, video_id=video_id)
2381 raise ExtractorError(
2382 '"token" parameter not in video info for unknown reason',
2385 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2386 raise ExtractorError('This video is DRM protected.', expected=True)
2388 self._sort_formats(formats)
2390 self.mark_watched(video_id, video_info, player_response)
2394 'uploader': video_uploader,
2395 'uploader_id': video_uploader_id,
2396 'uploader_url': video_uploader_url,
2397 'channel_id': channel_id,
2398 'channel_url': channel_url,
2399 'upload_date': upload_date,
2400 'license': video_license,
2401 'creator': video_creator or artist,
2402 'title': video_title,
2403 'alt_title': video_alt_title or track,
2404 'thumbnail': video_thumbnail,
2405 'description': video_description,
2406 'categories': video_categories,
2408 'subtitles': video_subtitles,
2409 'automatic_captions': automatic_captions,
2410 'duration': video_duration,
2411 'age_limit': 18 if age_gate else 0,
2412 'annotations': video_annotations,
2413 'chapters': chapters,
2414 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2415 'view_count': view_count,
2416 'like_count': like_count,
2417 'dislike_count': dislike_count,
2418 'average_rating': average_rating,
2421 'start_time': start_time,
2422 'end_time': end_time,
2424 'season_number': season_number,
2425 'episode_number': episode_number,
2429 'release_date': release_date,
2430 'release_year': release_year,
2434 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2435 IE_DESC = 'YouTube.com playlists'
2436 _VALID_URL = r"""(?x)(?:
2446 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2447 \? (?:.*?[&;])*? (?:p|a|list)=
2450 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2453 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2454 # Top tracks, they can also include dots
2460 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2461 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2462 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2463 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2464 IE_NAME = 'youtube:playlist'
2466 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2468 'title': 'ytdl test PL',
2469 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2471 'playlist_count': 3,
2473 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2475 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2476 'title': 'YDL_Empty_List',
2478 'playlist_count': 0,
2479 'skip': 'This playlist is private',
2481 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2482 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2484 'title': '29C3: Not my department',
2485 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2486 'uploader': 'Christiaan008',
2487 'uploader_id': 'ChRiStIaAn008',
2489 'playlist_count': 95,
2491 'note': 'issue #673',
2492 'url': 'PLBB231211A4F62143',
2494 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2495 'id': 'PLBB231211A4F62143',
2496 'uploader': 'Wickydoo',
2497 'uploader_id': 'Wickydoo',
2499 'playlist_mincount': 26,
2501 'note': 'Large playlist',
2502 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2504 'title': 'Uploads from Cauchemar',
2505 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2506 'uploader': 'Cauchemar',
2507 'uploader_id': 'Cauchemar89',
2509 'playlist_mincount': 799,
2511 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2513 'title': 'YDL_safe_search',
2514 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2516 'playlist_count': 2,
2517 'skip': 'This playlist is private',
2520 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2521 'playlist_count': 4,
2524 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2525 'uploader': 'milan',
2526 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2529 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2530 'playlist_mincount': 485,
2532 'title': '2018 Chinese New Singles (11/6 updated)',
2533 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2535 'uploader_id': 'sdragonfang',
2538 'note': 'Embedded SWF player',
2539 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2540 'playlist_count': 4,
2543 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2545 'skip': 'This playlist does not exist',
2547 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2548 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2550 'title': 'Uploads from Interstellar Movie',
2551 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2552 'uploader': 'Interstellar Movie',
2553 'uploader_id': 'InterstellarMovie1',
2555 'playlist_mincount': 21,
2557 # Playlist URL that does not actually serve a playlist
2558 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2560 'id': 'FqZTN594JQw',
2562 'title': "Smiley's People 01 detective, Adventure Series, Action",
2563 'uploader': 'STREEM',
2564 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2565 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2566 'upload_date': '20150526',
2567 'license': 'Standard YouTube License',
2568 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2569 'categories': ['People & Blogs'],
2573 'dislike_count': int,
2576 'skip_download': True,
2578 'skip': 'This video is not available.',
2579 'add_ie': [YoutubeIE.ie_key()],
2581 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2583 'id': 'yeWKywCrFtk',
2585 'title': 'Small Scale Baler and Braiding Rugs',
2586 'uploader': 'Backus-Page House Museum',
2587 'uploader_id': 'backuspagemuseum',
2588 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2589 'upload_date': '20161008',
2590 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2591 'categories': ['Nonprofits & Activism'],
2594 'dislike_count': int,
2598 'skip_download': True,
2601 # https://github.com/ytdl-org/youtube-dl/issues/21844
2602 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2604 'title': 'Data Analysis with Dr Mike Pound',
2605 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2606 'uploader_id': 'Computerphile',
2607 'uploader': 'Computerphile',
2609 'playlist_mincount': 11,
2611 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2612 'only_matching': True,
2614 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2615 'only_matching': True,
2617 # music album playlist
2618 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2619 'only_matching': True,
2621 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2622 'only_matching': True,
2625 def _real_initialize(self):
2628 def extract_videos_from_page(self, page):
2632 for item in re.findall(
2633 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2634 attrs = extract_attributes(item)
2635 video_id = attrs['data-video-id']
2636 video_title = unescapeHTML(attrs.get('data-title'))
2638 video_title = video_title.strip()
2639 ids_in_page.append(video_id)
2640 titles_in_page.append(video_title)
2642 # Fallback with old _VIDEO_RE
2643 self.extract_videos_from_page_impl(
2644 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2647 self.extract_videos_from_page_impl(
2648 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2649 ids_in_page, titles_in_page)
2650 self.extract_videos_from_page_impl(
2651 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2652 ids_in_page, titles_in_page)
2654 return zip(ids_in_page, titles_in_page)
2656 def _extract_mix(self, playlist_id):
2657 # The mixes are generated from a single video
2658 # the id of the playlist is just 'RD' + video_id
2660 last_id = playlist_id[-11:]
2661 for n in itertools.count(1):
2662 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2663 webpage = self._download_webpage(
2664 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2665 new_ids = orderedSet(re.findall(
2666 r'''(?xs)data-video-username=".*?".*?
2667 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2669 # Fetch new pages until all the videos are repeated, it seems that
2670 # there are always 51 unique videos.
2671 new_ids = [_id for _id in new_ids if _id not in ids]
2677 url_results = self._ids_to_results(ids)
2679 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2681 search_title('playlist-title')
2682 or search_title('title long-title')
2683 or search_title('title'))
2684 title = clean_html(title_span)
2686 return self.playlist_result(url_results, playlist_id, title)
2688 def _extract_playlist(self, playlist_id):
2689 url = self._TEMPLATE_URL % playlist_id
2690 page = self._download_webpage(url, playlist_id)
2692 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2693 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2694 match = match.strip()
2695 # Check if the playlist exists or is private
2696 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2698 reason = mobj.group('reason')
2699 message = 'This playlist %s' % reason
2700 if 'private' in reason:
2701 message += ', use --username or --netrc to access it'
2703 raise ExtractorError(message, expected=True)
2704 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2705 raise ExtractorError(
2706 'Invalid parameters. Maybe URL is incorrect.',
2708 elif re.match(r'[^<]*Choose your language[^<]*', match):
2711 self.report_warning('Youtube gives an alert message: ' + match)
2713 playlist_title = self._html_search_regex(
2714 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2715 page, 'title', default=None)
2717 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2718 uploader = self._search_regex(
2719 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2720 page, 'uploader', default=None)
2722 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2725 uploader_id = mobj.group('uploader_id')
2726 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2728 uploader_id = uploader_url = None
2732 if not playlist_title:
2734 # Some playlist URLs don't actually serve a playlist (e.g.
2735 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2736 next(self._entries(page, playlist_id))
2737 except StopIteration:
2740 playlist = self.playlist_result(
2741 self._entries(page, playlist_id), playlist_id, playlist_title)
2743 'uploader': uploader,
2744 'uploader_id': uploader_id,
2745 'uploader_url': uploader_url,
2748 return has_videos, playlist
2750 def _check_download_just_video(self, url, playlist_id):
2751 # Check if it's a video-specific URL
2752 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2753 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2754 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2755 'video id', default=None)
2757 if self._downloader.params.get('noplaylist'):
2758 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2759 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2761 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2762 return video_id, None
2765 def _real_extract(self, url):
2766 # Extract playlist id
2767 mobj = re.match(self._VALID_URL, url)
2769 raise ExtractorError('Invalid URL: %s' % url)
2770 playlist_id = mobj.group(1) or mobj.group(2)
2772 video_id, video = self._check_download_just_video(url, playlist_id)
2776 if playlist_id.startswith(('RD', 'UL', 'PU')):
2777 # Mixes require a custom extraction process
2778 return self._extract_mix(playlist_id)
2780 has_videos, playlist = self._extract_playlist(playlist_id)
2781 if has_videos or not video_id:
2784 # Some playlist URLs don't actually serve a playlist (see
2785 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2786 # Fallback to plain video extraction if there is a video id
2787 # along with playlist id.
2788 return self.url_result(video_id, 'Youtube', video_id=video_id)
2791 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2792 IE_DESC = 'YouTube.com channels'
2793 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2794 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2795 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2796 IE_NAME = 'youtube:channel'
2798 'note': 'paginated channel',
2799 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2800 'playlist_mincount': 91,
2802 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2803 'title': 'Uploads from lex will',
2804 'uploader': 'lex will',
2805 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2808 'note': 'Age restricted channel',
2809 # from https://www.youtube.com/user/DeusExOfficial
2810 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2811 'playlist_mincount': 64,
2813 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2814 'title': 'Uploads from Deus Ex',
2815 'uploader': 'Deus Ex',
2816 'uploader_id': 'DeusExOfficial',
2819 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2820 'only_matching': True,
2824 def suitable(cls, url):
2825 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2826 else super(YoutubeChannelIE, cls).suitable(url))
2828 def _build_template_url(self, url, channel_id):
2829 return self._TEMPLATE_URL % channel_id
2831 def _real_extract(self, url):
2832 channel_id = self._match_id(url)
2834 url = self._build_template_url(url, channel_id)
2836 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2837 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2838 # otherwise fallback on channel by page extraction
2839 channel_page = self._download_webpage(
2840 url + '?view=57', channel_id,
2841 'Downloading channel page', fatal=False)
2842 if channel_page is False:
2843 channel_playlist_id = False
2845 channel_playlist_id = self._html_search_meta(
2846 'channelId', channel_page, 'channel id', default=None)
2847 if not channel_playlist_id:
2848 channel_url = self._html_search_meta(
2849 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2850 channel_page, 'channel url', default=None)
2852 channel_playlist_id = self._search_regex(
2853 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2854 channel_url, 'channel id', default=None)
2855 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2856 playlist_id = 'UU' + channel_playlist_id[2:]
2857 return self.url_result(
2858 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2860 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2861 autogenerated = re.search(r'''(?x)
2863 channel-header-autogenerated-label|
2864 yt-channel-title-autogenerated
2865 )[^"]*"''', channel_page) is not None
2868 # The videos are contained in a single page
2869 # the ajax pages can't be used, they are empty
2872 video_id, 'Youtube', video_id=video_id,
2873 video_title=video_title)
2874 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2875 return self.playlist_result(entries, channel_id)
2878 next(self._entries(channel_page, channel_id))
2879 except StopIteration:
2880 alert_message = self._html_search_regex(
2881 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2882 channel_page, 'alert', default=None, group='alert')
2884 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2886 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2889 class YoutubeUserIE(YoutubeChannelIE):
2890 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2891 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2892 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2893 IE_NAME = 'youtube:user'
2896 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2897 'playlist_mincount': 320,
2899 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2900 'title': 'Uploads from The Linux Foundation',
2901 'uploader': 'The Linux Foundation',
2902 'uploader_id': 'TheLinuxFoundation',
2905 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2906 # but not https://www.youtube.com/user/12minuteathlete/videos
2907 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2908 'playlist_mincount': 249,
2910 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2911 'title': 'Uploads from 12 Minute Athlete',
2912 'uploader': '12 Minute Athlete',
2913 'uploader_id': 'the12minuteathlete',
2916 'url': 'ytuser:phihag',
2917 'only_matching': True,
2919 'url': 'https://www.youtube.com/c/gametrailers',
2920 'only_matching': True,
2922 'url': 'https://www.youtube.com/gametrailers',
2923 'only_matching': True,
2925 # This channel is not available, geo restricted to JP
2926 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2927 'only_matching': True,
2931 def suitable(cls, url):
2932 # Don't return True if the url can be extracted with other youtube
2933 # extractor, the regex would is too permissive and it would match.
2934 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2935 if any(ie.suitable(url) for ie in other_yt_ies):
2938 return super(YoutubeUserIE, cls).suitable(url)
2940 def _build_template_url(self, url, channel_id):
2941 mobj = re.match(self._VALID_URL, url)
2942 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2945 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2946 IE_DESC = 'YouTube.com live streams'
2947 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2948 IE_NAME = 'youtube:live'
2951 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2953 'id': 'a48o2S1cPoo',
2955 'title': 'The Young Turks - Live Main Show',
2956 'uploader': 'The Young Turks',
2957 'uploader_id': 'TheYoungTurks',
2958 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2959 'upload_date': '20150715',
2960 'license': 'Standard YouTube License',
2961 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2962 'categories': ['News & Politics'],
2963 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2965 'dislike_count': int,
2968 'skip_download': True,
2971 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2972 'only_matching': True,
2974 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2975 'only_matching': True,
2977 'url': 'https://www.youtube.com/TheYoungTurks/live',
2978 'only_matching': True,
2981 def _real_extract(self, url):
2982 mobj = re.match(self._VALID_URL, url)
2983 channel_id = mobj.group('id')
2984 base_url = mobj.group('base_url')
2985 webpage = self._download_webpage(url, channel_id, fatal=False)
2987 page_type = self._og_search_property(
2988 'type', webpage, 'page type', default='')
2989 video_id = self._html_search_meta(
2990 'videoId', webpage, 'video id', default=None)
2991 if page_type.startswith('video') and video_id and re.match(
2992 r'^[0-9A-Za-z_-]{11}$', video_id):
2993 return self.url_result(video_id, YoutubeIE.ie_key())
2994 return self.url_result(base_url)
2997 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2998 IE_DESC = 'YouTube.com user/channel playlists'
2999 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3000 IE_NAME = 'youtube:playlists'
3003 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3004 'playlist_mincount': 4,
3006 'id': 'ThirstForScience',
3007 'title': 'ThirstForScience',
3010 # with "Load more" button
3011 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3012 'playlist_mincount': 70,
3015 'title': 'Игорь Клейнер',
3018 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3019 'playlist_mincount': 17,
3021 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3022 'title': 'Chem Player',
3028 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3029 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3032 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3033 IE_DESC = 'YouTube.com searches'
3034 # there doesn't appear to be a real limit, for example if you search for
3035 # 'python' you get more than 8.000.000 results
3036 _MAX_RESULTS = float('inf')
3037 IE_NAME = 'youtube:search'
3038 _SEARCH_KEY = 'ytsearch'
3039 _EXTRA_QUERY_ARGS = {}
3042 def _get_n_results(self, query, n):
3043 """Get a specified number of results for a query"""
3049 'search_query': query.encode('utf-8'),
3051 url_query.update(self._EXTRA_QUERY_ARGS)
3052 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3054 for pagenum in itertools.count(1):
3055 data = self._download_json(
3056 result_url, video_id='query "%s"' % query,
3057 note='Downloading page %s' % pagenum,
3058 errnote='Unable to download API page',
3059 query={'spf': 'navigate'})
3060 html_content = data[1]['body']['content']
3062 if 'class="search-message' in html_content:
3063 raise ExtractorError(
3064 '[youtube] No video results', expected=True)
3066 new_videos = list(self._process_page(html_content))
3067 videos += new_videos
3068 if not new_videos or len(videos) > limit:
3070 next_link = self._html_search_regex(
3071 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3072 html_content, 'next link', default=None)
3073 if next_link is None:
3075 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3079 return self.playlist_result(videos, query)
3082 class YoutubeSearchDateIE(YoutubeSearchIE):
3083 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3084 _SEARCH_KEY = 'ytsearchdate'
3085 IE_DESC = 'YouTube.com searches, newest videos first'
3086 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3089 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3090 IE_DESC = 'YouTube.com search URLs'
3091 IE_NAME = 'youtube:search_url'
3092 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3094 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3095 'playlist_mincount': 5,
3097 'title': 'youtube-dl test video',
3100 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3101 'only_matching': True,
3104 def _real_extract(self, url):
3105 mobj = re.match(self._VALID_URL, url)
3106 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3107 webpage = self._download_webpage(url, query)
3108 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3111 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3112 IE_DESC = 'YouTube.com (multi-season) shows'
3113 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3114 IE_NAME = 'youtube:show'
3116 'url': 'https://www.youtube.com/show/airdisasters',
3117 'playlist_mincount': 5,
3119 'id': 'airdisasters',
3120 'title': 'Air Disasters',
3124 def _real_extract(self, url):
3125 playlist_id = self._match_id(url)
3126 return super(YoutubeShowIE, self)._real_extract(
3127 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3130 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3132 Base class for feed extractors
3133 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3135 _LOGIN_REQUIRED = True
3139 return 'youtube:%s' % self._FEED_NAME
3141 def _real_initialize(self):
3144 def _entries(self, page):
3145 # The extraction process is the same as for playlists, but the regex
3146 # for the video ids doesn't contain an index
3148 more_widget_html = content_html = page
3149 for page_num in itertools.count(1):
3150 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3152 # 'recommended' feed has infinite 'load more' and each new portion spins
3153 # the same videos in (sometimes) slightly different order, so we'll check
3154 # for unicity and break when portion has no new videos
3155 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3161 for entry in self._ids_to_results(new_ids):
3164 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3168 more = self._download_json(
3169 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3170 'Downloading page #%s' % page_num,
3171 transform_source=uppercase_escape)
3172 content_html = more['content_html']
3173 more_widget_html = more['load_more_widget_html']
3175 def _real_extract(self, url):
3176 page = self._download_webpage(
3177 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3178 self._PLAYLIST_TITLE)
3179 return self.playlist_result(
3180 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3183 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3184 IE_NAME = 'youtube:watchlater'
3185 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3186 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3189 'url': 'https://www.youtube.com/playlist?list=WL',
3190 'only_matching': True,
3192 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3193 'only_matching': True,
3196 def _real_extract(self, url):
3197 _, video = self._check_download_just_video(url, 'WL')
3200 _, playlist = self._extract_playlist('WL')
3204 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3205 IE_NAME = 'youtube:favorites'
3206 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3207 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3208 _LOGIN_REQUIRED = True
3210 def _real_extract(self, url):
3211 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3212 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3213 return self.url_result(playlist_id, 'YoutubePlaylist')
3216 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3217 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3218 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3219 _FEED_NAME = 'recommended'
3220 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3223 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3224 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3225 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3226 _FEED_NAME = 'subscriptions'
3227 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3230 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3231 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3232 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3233 _FEED_NAME = 'history'
3234 _PLAYLIST_TITLE = 'Youtube History'
3237 class YoutubeTruncatedURLIE(InfoExtractor):
3238 IE_NAME = 'youtube:truncated_url'
3239 IE_DESC = False # Do not list
3240 _VALID_URL = r'''(?x)
3242 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3245 annotation_id=annotation_[^&]+|
3251 attribution_link\?a=[^&]+
3257 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3258 'only_matching': True,
3260 'url': 'https://www.youtube.com/watch?',
3261 'only_matching': True,
3263 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3264 'only_matching': True,
3266 'url': 'https://www.youtube.com/watch?feature=foo',
3267 'only_matching': True,
3269 'url': 'https://www.youtube.com/watch?hl=en-GB',
3270 'only_matching': True,
3272 'url': 'https://www.youtube.com/watch?t=2372',
3273 'only_matching': True,
3276 def _real_extract(self, url):
3277 raise ExtractorError(
3278 'Did you forget to quote the URL? Remember that & is a meta '
3279 'character in most shells, so you want to put the URL in quotes, '
3281 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3282 ' or simply youtube-dl BaW_jenozKc .',
3286 class YoutubeTruncatedIDIE(InfoExtractor):
3287 IE_NAME = 'youtube:truncated_id'
3288 IE_DESC = False # Do not list
3289 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3292 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3293 'only_matching': True,
3296 def _real_extract(self, url):
3297 video_id = self._match_id(url)
3298 raise ExtractorError(
3299 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),