3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
60 class YoutubeBaseInfoExtractor(InfoExtractor):
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
73 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
75 def _set_language(self):
77 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
78 # YouTube sets the expire time to about two months
79 expire_time=time.time() + 2 * 30 * 24 * 3600)
81 def _ids_to_results(self, ids):
83 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 Attempt to log in to YouTube.
89 True is returned if successful or skipped.
90 False is returned if login failed.
92 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 username, password = self._get_login_info()
95 # No authentication to be performed
97 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
98 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
101 login_page = self._download_webpage(
102 self._LOGIN_URL, None,
103 note='Downloading login page',
104 errnote='unable to fetch login page', fatal=False)
105 if login_page is False:
108 login_form = self._hidden_inputs(login_page)
110 def req(url, f_req, note, errnote):
111 data = login_form.copy()
114 'checkConnection': 'youtube',
115 'checkedDomains': 'youtube',
117 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
118 'f.req': json.dumps(f_req),
119 'flowName': 'GlifWebSignIn',
120 'flowEntry': 'ServiceLogin',
121 # TODO: reverse actual botguard identifier generation algo
122 'bgRequest': '["identifier",""]',
124 return self._download_json(
125 url, None, note=note, errnote=errnote,
126 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
128 data=urlencode_postdata(data), headers={
129 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
130 'Google-Accounts-XSRF': 1,
134 self._downloader.report_warning(message)
138 None, [], None, 'US', None, None, 2, False, True,
142 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
144 1, [None, None, []], None, None, None, True
149 lookup_results = req(
150 self._LOOKUP_URL, lookup_req,
151 'Looking up account info', 'Unable to look up account info')
153 if lookup_results is False:
156 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
158 warn('Unable to extract user hash')
163 None, 1, None, [1, None, None, None, [password, None, True]],
165 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
166 1, [None, None, []], None, None, None, True
169 challenge_results = req(
170 self._CHALLENGE_URL, challenge_req,
171 'Logging in', 'Unable to log in')
173 if challenge_results is False:
176 login_res = try_get(challenge_results, lambda x: x[0][5], list)
178 login_msg = try_get(login_res, lambda x: x[5], compat_str)
180 'Unable to login: %s' % 'Invalid password'
181 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
184 res = try_get(challenge_results, lambda x: x[0][-1], list)
186 warn('Unable to extract result entry')
189 login_challenge = try_get(res, lambda x: x[0][0], list)
191 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
192 if challenge_str == 'TWO_STEP_VERIFICATION':
193 # SEND_SUCCESS - TFA code has been successfully sent to phone
194 # QUOTA_EXCEEDED - reached the limit of TFA codes
195 status = try_get(login_challenge, lambda x: x[5], compat_str)
196 if status == 'QUOTA_EXCEEDED':
197 warn('Exceeded the limit of TFA codes, try later')
200 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
202 warn('Unable to extract TL')
205 tfa_code = self._get_tfa_info('2-step verification code')
209 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
210 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
213 tfa_code = remove_start(tfa_code, 'G-')
216 user_hash, None, 2, None,
218 9, None, None, None, None, None, None, None,
219 [None, tfa_code, True, 2]
223 self._TFA_URL.format(tl), tfa_req,
224 'Submitting TFA code', 'Unable to submit TFA code')
226 if tfa_results is False:
229 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
231 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
233 'Unable to finish TFA: %s' % 'Invalid TFA code'
234 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
237 check_cookie_url = try_get(
238 tfa_results, lambda x: x[0][-1][2], compat_str)
241 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
242 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
243 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
245 challenge = CHALLENGES.get(
247 '%s returned error %s.' % (self.IE_NAME, challenge_str))
248 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
251 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
253 if not check_cookie_url:
254 warn('Unable to extract CheckCookie URL')
257 check_cookie_results = self._download_webpage(
258 check_cookie_url, None, 'Checking cookie', fatal=False)
260 if check_cookie_results is False:
263 if 'https://myaccount.google.com/' not in check_cookie_results:
264 warn('Unable to log in')
269 def _download_webpage_handle(self, *args, **kwargs):
270 query = kwargs.get('query', {}).copy()
271 query['disable_polymer'] = 'true'
272 kwargs['query'] = query
273 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
274 *args, **compat_kwargs(kwargs))
276 def _real_initialize(self):
277 if self._downloader is None:
280 if not self._login():
284 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
285 # Extract entries from page with "Load more" button
286 def _entries(self, page, playlist_id):
287 more_widget_html = content_html = page
288 for page_num in itertools.count(1):
289 for entry in self._process_page(content_html):
292 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
298 while count <= retries:
300 # Downloading page may result in intermittent 5xx HTTP error
301 # that is usually worked around with a retry
302 more = self._download_json(
303 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
304 'Downloading page #%s%s'
305 % (page_num, ' (retry #%d)' % count if count else ''),
306 transform_source=uppercase_escape)
308 except ExtractorError as e:
309 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
315 content_html = more['content_html']
316 if not content_html.strip():
317 # Some webpages show a "Load more" button but they don't
320 more_widget_html = more['load_more_widget_html']
323 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
324 def _process_page(self, content):
325 for video_id, video_title in self.extract_videos_from_page(content):
326 yield self.url_result(video_id, 'Youtube', video_id, video_title)
328 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
329 for mobj in re.finditer(video_re, page):
330 # The link with index 0 is not the first video of the playlist (not sure if still actual)
331 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
333 video_id = mobj.group('id')
334 video_title = unescapeHTML(
335 mobj.group('title')) if 'title' in mobj.groupdict() else None
337 video_title = video_title.strip()
338 if video_title == '► Play all':
341 idx = ids_in_page.index(video_id)
342 if video_title and not titles_in_page[idx]:
343 titles_in_page[idx] = video_title
345 ids_in_page.append(video_id)
346 titles_in_page.append(video_title)
348 def extract_videos_from_page(self, page):
351 self.extract_videos_from_page_impl(
352 self._VIDEO_RE, page, ids_in_page, titles_in_page)
353 return zip(ids_in_page, titles_in_page)
356 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
357 def _process_page(self, content):
358 for playlist_id in orderedSet(re.findall(
359 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
361 yield self.url_result(
362 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
364 def _real_extract(self, url):
365 playlist_id = self._match_id(url)
366 webpage = self._download_webpage(url, playlist_id)
367 title = self._og_search_title(webpage, fatal=False)
368 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
371 class YoutubeIE(YoutubeBaseInfoExtractor):
372 IE_DESC = 'YouTube.com'
373 _VALID_URL = r"""(?x)^
375 (?:https?://|//) # http(s):// or protocol-independent URL
376 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
377 (?:www\.)?deturl\.com/www\.youtube\.com/|
378 (?:www\.)?pwnyoutube\.com/|
379 (?:www\.)?hooktube\.com/|
380 (?:www\.)?yourepeat\.com/|
381 tube\.majestyc\.net/|
382 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
383 (?:(?:www|dev)\.)?invidio\.us/|
384 (?:(?:www|no)\.)?invidiou\.sh/|
385 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
386 (?:www\.)?invidious\.kabi\.tk/|
387 (?:www\.)?invidious\.enkirton\.net/|
388 (?:www\.)?invidious\.13ad\.de/|
389 (?:www\.)?invidious\.mastodon\.host/|
390 (?:www\.)?tube\.poal\.co/|
391 (?:www\.)?vid\.wxzm\.sx/|
392 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
393 (?:.*?\#/)? # handle anchor (#/) redirect urls
394 (?: # the various things that can precede the ID:
395 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
396 |(?: # or the v= param in all its forms
397 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
398 (?:\?|\#!?) # the params delimiter ? or # or #!
399 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
404 youtu\.be| # just youtu.be/xxxx
405 vid\.plus| # or vid.plus/xxxx
406 zwearz\.com/watch| # or zwearz.com/watch/xxxx
408 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
410 )? # all until now is optional -> you can pass the naked ID
411 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
414 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
415 WL # WL are handled by the watch later IE
418 (?(1).+)? # if we found the ID, everything can follow
419 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
420 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
422 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
423 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
424 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
425 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
426 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
427 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
429 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
430 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
431 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
432 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
433 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
434 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
435 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
436 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
437 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
438 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
443 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
444 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
445 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
446 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
447 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
448 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
449 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
451 # Apple HTTP Live Streaming
452 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
453 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
455 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
456 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
457 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
458 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
459 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
462 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
463 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
467 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
468 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
469 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
470 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
472 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
473 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
477 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
478 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
479 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
480 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
481 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
482 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
485 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
487 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
488 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
489 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
490 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
491 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
492 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
495 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
496 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
498 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
499 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
500 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
501 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
503 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
504 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
505 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
509 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
510 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
512 # Dash webm audio with opus inside
513 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
514 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
515 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
518 '_rtmp': {'protocol': 'rtmp'},
520 # av01 video only formats sometimes served with "unknown" codecs
521 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
522 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
523 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
524 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
526 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
533 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
537 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
538 'uploader': 'Philipp Hagemeister',
539 'uploader_id': 'phihag',
540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
541 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
542 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
543 'upload_date': '20121002',
544 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
545 'categories': ['Science & Technology'],
546 'tags': ['youtube-dl'],
550 'dislike_count': int,
556 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
557 'note': 'Test generic use_cipher_signature video (#897)',
561 'upload_date': '20120506',
562 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
563 'alt_title': 'I Love It (feat. Charli XCX)',
564 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
565 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
566 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
567 'iconic ep', 'iconic', 'love', 'it'],
569 'uploader': 'Icona Pop',
570 'uploader_id': 'IconaPop',
571 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
572 'creator': 'Icona Pop',
573 'track': 'I Love It (feat. Charli XCX)',
574 'artist': 'Icona Pop',
578 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
579 'note': 'Test VEVO video with age protection (#956)',
583 'upload_date': '20130703',
584 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
585 'alt_title': 'Tunnel Vision',
586 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
588 'uploader': 'justintimberlakeVEVO',
589 'uploader_id': 'justintimberlakeVEVO',
590 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
591 'creator': 'Justin Timberlake',
592 'track': 'Tunnel Vision',
593 'artist': 'Justin Timberlake',
598 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
599 'note': 'Embed-only video (#1746)',
603 'upload_date': '20120608',
604 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
605 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
606 'uploader': 'SET India',
607 'uploader_id': 'setindia',
608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
613 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
614 'note': 'Use the first video ID in the URL',
618 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
619 'uploader': 'Philipp Hagemeister',
620 'uploader_id': 'phihag',
621 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
622 'upload_date': '20121002',
623 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
624 'categories': ['Science & Technology'],
625 'tags': ['youtube-dl'],
629 'dislike_count': int,
632 'skip_download': True,
636 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
637 'note': '256k DASH audio (format 141) via DASH manifest',
641 'upload_date': '20121002',
642 'uploader_id': '8KVIDEO',
643 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
645 'uploader': '8KVIDEO',
646 'title': 'UHDTV TEST 8K VIDEO.mp4'
649 'youtube_include_dash_manifest': True,
652 'skip': 'format 141 not served anymore',
654 # DASH manifest with encrypted signature
656 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
660 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
661 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
663 'uploader': 'AfrojackVEVO',
664 'uploader_id': 'AfrojackVEVO',
665 'upload_date': '20131011',
668 'youtube_include_dash_manifest': True,
669 'format': '141/bestaudio[ext=m4a]',
672 # JS player signature function name containing $
674 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
678 'title': 'Taylor Swift - Shake It Off',
679 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
681 'uploader': 'TaylorSwiftVEVO',
682 'uploader_id': 'TaylorSwiftVEVO',
683 'upload_date': '20140818',
684 'creator': 'Taylor Swift',
687 'youtube_include_dash_manifest': True,
688 'format': '141/bestaudio[ext=m4a]',
693 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
698 'upload_date': '20100909',
699 'uploader': 'Amazing Atheist',
700 'uploader_id': 'TheAmazingAtheist',
701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
702 'title': 'Burning Everyone\'s Koran',
703 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
706 # Normal age-gate video (No vevo, embed allowed)
708 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
712 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
713 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
715 'uploader': 'The Witcher',
716 'uploader_id': 'WitcherGame',
717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
718 'upload_date': '20140605',
722 # Age-gate video with encrypted signature
724 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
728 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
729 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
731 'uploader': 'LloydVEVO',
732 'uploader_id': 'LloydVEVO',
733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
734 'upload_date': '20110629',
738 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
739 # YouTube Red ad is not captured for creator
741 'url': '__2ABJjxzNo',
746 'upload_date': '20100430',
747 'uploader_id': 'deadmau5',
748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
749 'creator': 'deadmau5',
750 'description': 'md5:12c56784b8032162bb936a5f76d55360',
751 'uploader': 'deadmau5',
752 'title': 'Deadmau5 - Some Chords (HD)',
753 'alt_title': 'Some Chords',
755 'expected_warnings': [
756 'DASH manifest missing',
759 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
761 'url': 'lqQg6PlCWgI',
766 'upload_date': '20150827',
767 'uploader_id': 'olympic',
768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
769 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
770 'uploader': 'Olympic',
771 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
774 'skip_download': 'requires avconv',
779 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
783 'stretched_ratio': 16 / 9.,
785 'upload_date': '20110310',
786 'uploader_id': 'AllenMeow',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
788 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
790 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
793 # url_encoded_fmt_stream_map is empty string
795 'url': 'qEJwOuvDf7I',
799 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
801 'upload_date': '20150404',
802 'uploader_id': 'spbelect',
803 'uploader': 'Наблюдатели Петербурга',
806 'skip_download': 'requires avconv',
808 'skip': 'This live event has ended.',
810 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
812 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
816 'title': 'md5:7b81415841e02ecd4313668cde88737a',
817 'description': 'md5:116377fd2963b81ec4ce64b542173306',
819 'upload_date': '20150625',
820 'uploader_id': 'dorappi2000',
821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
822 'uploader': 'dorappi2000',
823 'formats': 'mincount:31',
825 'skip': 'not actual anymore',
827 # DASH manifest with segment_list
829 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
830 'md5': '8ce563a1d667b599d21064e982ab9e31',
834 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
835 'uploader': 'Airtek',
836 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
837 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
838 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
841 'youtube_include_dash_manifest': True,
842 'format': '135', # bestvideo
844 'skip': 'This live event has ended.',
847 # Multifeed videos (multiple cameras), URL is for Main Camera
848 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
851 'title': 'teamPGP: Rocket League Noob Stream',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
858 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
859 'description': 'md5:dc7872fb300e143831327f1bae3af010',
861 'upload_date': '20150721',
862 'uploader': 'Beer Games Beer',
863 'uploader_id': 'beergamesbeer',
864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
865 'license': 'Standard YouTube License',
871 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
872 'description': 'md5:dc7872fb300e143831327f1bae3af010',
874 'upload_date': '20150721',
875 'uploader': 'Beer Games Beer',
876 'uploader_id': 'beergamesbeer',
877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
878 'license': 'Standard YouTube License',
884 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
885 'description': 'md5:dc7872fb300e143831327f1bae3af010',
887 'upload_date': '20150721',
888 'uploader': 'Beer Games Beer',
889 'uploader_id': 'beergamesbeer',
890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
891 'license': 'Standard YouTube License',
897 'title': 'teamPGP: Rocket League Noob Stream (zim)',
898 'description': 'md5:dc7872fb300e143831327f1bae3af010',
900 'upload_date': '20150721',
901 'uploader': 'Beer Games Beer',
902 'uploader_id': 'beergamesbeer',
903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
904 'license': 'Standard YouTube License',
908 'skip_download': True,
910 'skip': 'This video is not available.',
913 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
914 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
917 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
920 'skip': 'Not multifeed anymore',
923 'url': 'https://vid.plus/FlRa-iH7PGw',
924 'only_matching': True,
927 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
928 'only_matching': True,
931 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
932 # Also tests cut-off URL expansion in video description (see
933 # https://github.com/ytdl-org/youtube-dl/issues/1892,
934 # https://github.com/ytdl-org/youtube-dl/issues/8164)
935 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
939 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
940 'alt_title': 'Dark Walk - Position Music',
941 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
943 'upload_date': '20151119',
944 'uploader_id': 'IronSoulElf',
945 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
946 'uploader': 'IronSoulElf',
947 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
948 'track': 'Dark Walk - Position Music',
949 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
950 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
953 'skip_download': True,
957 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
958 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
959 'only_matching': True,
962 # Video with yt:stretch=17:0
963 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
967 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
968 'description': 'md5:ee18a25c350637c8faff806845bddee9',
969 'upload_date': '20151107',
970 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
971 'uploader': 'CH GAMER DROID',
974 'skip_download': True,
976 'skip': 'This video does not exist.',
979 # Video licensed under Creative Commons
980 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
984 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
985 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
987 'upload_date': '20150127',
988 'uploader_id': 'BerkmanCenter',
989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
990 'uploader': 'The Berkman Klein Center for Internet & Society',
991 'license': 'Creative Commons Attribution license (reuse allowed)',
994 'skip_download': True,
998 # Channel-like uploader_url
999 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1001 'id': 'eQcmzGIKrzg',
1003 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1004 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1006 'upload_date': '20151119',
1007 'uploader': 'Bernie Sanders',
1008 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1013 'skip_download': True,
1017 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1018 'only_matching': True,
1021 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1022 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1023 'only_matching': True,
1026 # Rental video preview
1027 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1029 'id': 'uGpuVWrhIzE',
1031 'title': 'Piku - Trailer',
1032 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1033 'upload_date': '20150811',
1034 'uploader': 'FlixMatrix',
1035 'uploader_id': 'FlixMatrixKaravan',
1036 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1037 'license': 'Standard YouTube License',
1040 'skip_download': True,
1042 'skip': 'This video is not available.',
1045 # YouTube Red video with episode data
1046 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1048 'id': 'iqKdEhx-dD4',
1050 'title': 'Isolation - Mind Field (Ep 1)',
1051 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1053 'upload_date': '20170118',
1054 'uploader': 'Vsauce',
1055 'uploader_id': 'Vsauce',
1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1057 'series': 'Mind Field',
1059 'episode_number': 1,
1062 'skip_download': True,
1064 'expected_warnings': [
1065 'Skipping DASH manifest',
1069 # The following content has been identified by the YouTube community
1070 # as inappropriate or offensive to some audiences.
1071 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1073 'id': '6SJNVb0GnPI',
1075 'title': 'Race Differences in Intelligence',
1076 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1078 'upload_date': '20140124',
1079 'uploader': 'New Century Foundation',
1080 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1084 'skip_download': True,
1089 'url': '1t24XAntNCY',
1090 'only_matching': True,
1093 # geo restricted to JP
1094 'url': 'sJL6WA-aGkQ',
1095 'only_matching': True,
1098 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1099 'only_matching': True,
1102 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1103 'only_matching': True,
1107 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1108 'only_matching': True,
1111 # Video with unsupported adaptive stream type formats
1112 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1114 'id': 'Z4Vy8R84T1U',
1116 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1117 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1119 'upload_date': '20130923',
1120 'uploader': 'Amelia Putri Harwita',
1121 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1123 'formats': 'maxcount:10',
1126 'skip_download': True,
1127 'youtube_include_dash_manifest': False,
1131 # Youtube Music Auto-generated description
1132 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1134 'id': 'MgNrAu2pzNs',
1136 'title': 'Voyeur Girl',
1137 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1138 'upload_date': '20190312',
1139 'uploader': 'Various Artists - Topic',
1140 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1141 'artist': 'Stephen',
1142 'track': 'Voyeur Girl',
1143 'album': 'it\'s too much love to know my dear',
1144 'release_date': '20190313',
1145 'release_year': 2019,
1148 'skip_download': True,
1152 # Youtube Music Auto-generated description
1153 # Retrieve 'artist' field from 'Artist:' in video description
1154 # when it is present on youtube music video
1155 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1157 'id': 'k0jLE7tTwjY',
1159 'title': 'Latch Feat. Sam Smith',
1160 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1161 'upload_date': '20150110',
1162 'uploader': 'Various Artists - Topic',
1163 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1164 'artist': 'Disclosure',
1165 'track': 'Latch Feat. Sam Smith',
1166 'album': 'Latch Featuring Sam Smith',
1167 'release_date': '20121008',
1168 'release_year': 2012,
1171 'skip_download': True,
1175 # Youtube Music Auto-generated description
1176 # handle multiple artists on youtube music video
1177 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1179 'id': '74qn0eJSjpA',
1181 'title': 'Eastside',
1182 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1183 'upload_date': '20180710',
1184 'uploader': 'Benny Blanco - Topic',
1185 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1186 'artist': 'benny blanco, Halsey, Khalid',
1187 'track': 'Eastside',
1188 'album': 'Eastside',
1189 'release_date': '20180713',
1190 'release_year': 2018,
1193 'skip_download': True,
1197 # Youtube Music Auto-generated description
1198 # handle youtube music video with release_year and no release_date
1199 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1201 'id': '-hcAI0g-f5M',
1203 'title': 'Put It On Me',
1204 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1205 'upload_date': '20180426',
1206 'uploader': 'Matt Maeson - Topic',
1207 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1208 'artist': 'Matt Maeson',
1209 'track': 'Put It On Me',
1210 'album': 'The Hearse',
1211 'release_date': None,
1212 'release_year': 2018,
1215 'skip_download': True,
1220 def __init__(self, *args, **kwargs):
1221 super(YoutubeIE, self).__init__(*args, **kwargs)
1222 self._player_cache = {}
1224 def report_video_info_webpage_download(self, video_id):
1225 """Report attempt to download video info webpage."""
1226 self.to_screen('%s: Downloading video info webpage' % video_id)
1228 def report_information_extraction(self, video_id):
1229 """Report attempt to extract video information."""
1230 self.to_screen('%s: Extracting video information' % video_id)
1232 def report_unavailable_format(self, video_id, format):
1233 """Report extracted video URL."""
1234 self.to_screen('%s: Format %s not available' % (video_id, format))
1236 def report_rtmp_download(self):
1237 """Indicate the download will use the RTMP protocol."""
1238 self.to_screen('RTMP download detected')
1240 def _signature_cache_id(self, example_sig):
1241 """ Return a string representation of a signature """
1242 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1244 def _extract_signature_function(self, video_id, player_url, example_sig):
1246 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1249 raise ExtractorError('Cannot identify player %r' % player_url)
1250 player_type = id_m.group('ext')
1251 player_id = id_m.group('id')
1253 # Read from filesystem cache
1254 func_id = '%s_%s_%s' % (
1255 player_type, player_id, self._signature_cache_id(example_sig))
1256 assert os.path.basename(func_id) == func_id
1258 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1259 if cache_spec is not None:
1260 return lambda s: ''.join(s[i] for i in cache_spec)
1263 'Downloading player %s' % player_url
1264 if self._downloader.params.get('verbose') else
1265 'Downloading %s player %s' % (player_type, player_id)
1267 if player_type == 'js':
1268 code = self._download_webpage(
1269 player_url, video_id,
1271 errnote='Download of %s failed' % player_url)
1272 res = self._parse_sig_js(code)
1273 elif player_type == 'swf':
1274 urlh = self._request_webpage(
1275 player_url, video_id,
1277 errnote='Download of %s failed' % player_url)
1279 res = self._parse_sig_swf(code)
1281 assert False, 'Invalid player type %r' % player_type
1283 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1284 cache_res = res(test_string)
1285 cache_spec = [ord(c) for c in cache_res]
1287 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1290 def _print_sig_code(self, func, example_sig):
1291 def gen_sig_code(idxs):
1292 def _genslice(start, end, step):
1293 starts = '' if start == 0 else str(start)
1294 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1295 steps = '' if step == 1 else (':%d' % step)
1296 return 's[%s%s%s]' % (starts, ends, steps)
1299 # Quelch pyflakes warnings - start will be set when step is set
1300 start = '(Never used)'
1301 for i, prev in zip(idxs[1:], idxs[:-1]):
1302 if step is not None:
1303 if i - prev == step:
1305 yield _genslice(start, prev, step)
1308 if i - prev in [-1, 1]:
1313 yield 's[%d]' % prev
1317 yield _genslice(start, i, step)
1319 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1320 cache_res = func(test_string)
1321 cache_spec = [ord(c) for c in cache_res]
1322 expr_code = ' + '.join(gen_sig_code(cache_spec))
1323 signature_id_tuple = '(%s)' % (
1324 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1325 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1326 ' return %s\n') % (signature_id_tuple, expr_code)
1327 self.to_screen('Extracted signature function:\n' + code)
1329 def _parse_sig_js(self, jscode):
1330 funcname = self._search_regex(
1331 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1332 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1333 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1335 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1336 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1337 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1338 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1343 jscode, 'Initial JS player signature function name', group='sig')
1345 jsi = JSInterpreter(jscode)
1346 initial_function = jsi.extract_function(funcname)
1347 return lambda s: initial_function([s])
1349 def _parse_sig_swf(self, file_contents):
1350 swfi = SWFInterpreter(file_contents)
1351 TARGET_CLASSNAME = 'SignatureDecipher'
1352 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1353 initial_function = swfi.extract_function(searched_class, 'decipher')
1354 return lambda s: initial_function([s])
1356 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1357 """Turn the encrypted s field into a working signature"""
1359 if player_url is None:
1360 raise ExtractorError('Cannot decrypt signature without player_url')
1362 if player_url.startswith('//'):
1363 player_url = 'https:' + player_url
1364 elif not re.match(r'https?://', player_url):
1365 player_url = compat_urlparse.urljoin(
1366 'https://www.youtube.com', player_url)
1368 player_id = (player_url, self._signature_cache_id(s))
1369 if player_id not in self._player_cache:
1370 func = self._extract_signature_function(
1371 video_id, player_url, s
1373 self._player_cache[player_id] = func
1374 func = self._player_cache[player_id]
1375 if self._downloader.params.get('youtube_print_sig_code'):
1376 self._print_sig_code(func, s)
1378 except Exception as e:
1379 tb = traceback.format_exc()
1380 raise ExtractorError(
1381 'Signature extraction failed: ' + tb, cause=e)
1383 def _get_subtitles(self, video_id, webpage):
1385 subs_doc = self._download_xml(
1386 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1387 video_id, note=False)
1388 except ExtractorError as err:
1389 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1393 for track in subs_doc.findall('track'):
1394 lang = track.attrib['lang_code']
1395 if lang in sub_lang_list:
1398 for ext in self._SUBTITLE_FORMATS:
1399 params = compat_urllib_parse_urlencode({
1403 'name': track.attrib['name'].encode('utf-8'),
1405 sub_formats.append({
1406 'url': 'https://www.youtube.com/api/timedtext?' + params,
1409 sub_lang_list[lang] = sub_formats
1410 if not sub_lang_list:
1411 self._downloader.report_warning('video doesn\'t have subtitles')
1413 return sub_lang_list
1415 def _get_ytplayer_config(self, video_id, webpage):
1417 # User data may contain arbitrary character sequences that may affect
1418 # JSON extraction with regex, e.g. when '};' is contained the second
1419 # regex won't capture the whole JSON. Yet working around by trying more
1420 # concrete regex first keeping in mind proper quoted string handling
1421 # to be implemented in future that will replace this workaround (see
1422 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1423 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1424 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1425 r';ytplayer\.config\s*=\s*({.+?});',
1427 config = self._search_regex(
1428 patterns, webpage, 'ytplayer.config', default=None)
1430 return self._parse_json(
1431 uppercase_escape(config), video_id, fatal=False)
1433 def _get_automatic_captions(self, video_id, webpage):
1434 """We need the webpage for getting the captions url, pass it as an
1435 argument to speed up the process."""
1436 self.to_screen('%s: Looking for automatic captions' % video_id)
1437 player_config = self._get_ytplayer_config(video_id, webpage)
1438 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1439 if not player_config:
1440 self._downloader.report_warning(err_msg)
1443 args = player_config['args']
1444 caption_url = args.get('ttsurl')
1446 timestamp = args['timestamp']
1447 # We get the available subtitles
1448 list_params = compat_urllib_parse_urlencode({
1453 list_url = caption_url + '&' + list_params
1454 caption_list = self._download_xml(list_url, video_id)
1455 original_lang_node = caption_list.find('track')
1456 if original_lang_node is None:
1457 self._downloader.report_warning('Video doesn\'t have automatic captions')
1459 original_lang = original_lang_node.attrib['lang_code']
1460 caption_kind = original_lang_node.attrib.get('kind', '')
1463 for lang_node in caption_list.findall('target'):
1464 sub_lang = lang_node.attrib['lang_code']
1466 for ext in self._SUBTITLE_FORMATS:
1467 params = compat_urllib_parse_urlencode({
1468 'lang': original_lang,
1472 'kind': caption_kind,
1474 sub_formats.append({
1475 'url': caption_url + '&' + params,
1478 sub_lang_list[sub_lang] = sub_formats
1479 return sub_lang_list
1481 def make_captions(sub_url, sub_langs):
1482 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1483 caption_qs = compat_parse_qs(parsed_sub_url.query)
1485 for sub_lang in sub_langs:
1487 for ext in self._SUBTITLE_FORMATS:
1489 'tlang': [sub_lang],
1492 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1493 query=compat_urllib_parse_urlencode(caption_qs, True)))
1494 sub_formats.append({
1498 captions[sub_lang] = sub_formats
1501 # New captions format as of 22.06.2017
1502 player_response = args.get('player_response')
1503 if player_response and isinstance(player_response, compat_str):
1504 player_response = self._parse_json(
1505 player_response, video_id, fatal=False)
1507 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1508 base_url = renderer['captionTracks'][0]['baseUrl']
1510 for lang in renderer['translationLanguages']:
1511 lang_code = lang.get('languageCode')
1513 sub_lang_list.append(lang_code)
1514 return make_captions(base_url, sub_lang_list)
1516 # Some videos don't provide ttsurl but rather caption_tracks and
1517 # caption_translation_languages (e.g. 20LmZk1hakA)
1518 # Does not used anymore as of 22.06.2017
1519 caption_tracks = args['caption_tracks']
1520 caption_translation_languages = args['caption_translation_languages']
1521 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1523 for lang in caption_translation_languages.split(','):
1524 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1525 sub_lang = lang_qs.get('lc', [None])[0]
1527 sub_lang_list.append(sub_lang)
1528 return make_captions(caption_url, sub_lang_list)
1529 # An extractor error can be raise by the download process if there are
1530 # no automatic captions but there are subtitles
1531 except (KeyError, IndexError, ExtractorError):
1532 self._downloader.report_warning(err_msg)
1535 def _mark_watched(self, video_id, video_info, player_response):
1536 playback_url = url_or_none(try_get(
1538 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1539 video_info, lambda x: x['videostats_playback_base_url'][0]))
1540 if not playback_url:
1542 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1543 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1545 # cpn generation algorithm is reverse engineered from base.js.
1546 # In fact it works even with dummy cpn.
1547 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1548 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1554 playback_url = compat_urlparse.urlunparse(
1555 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1557 self._download_webpage(
1558 playback_url, video_id, 'Marking watched',
1559 'Unable to mark watched', fatal=False)
1562 def _extract_urls(webpage):
1563 # Embedded YouTube player
1565 unescapeHTML(mobj.group('url'))
1566 for mobj in re.finditer(r'''(?x)
1576 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1577 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1580 # lazyYT YouTube embed
1581 entries.extend(list(map(
1583 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1585 # Wordpress "YouTube Video Importer" plugin
1586 matches = re.findall(r'''(?x)<div[^>]+
1587 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1588 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1589 entries.extend(m[-1] for m in matches)
1594 def _extract_url(webpage):
1595 urls = YoutubeIE._extract_urls(webpage)
1596 return urls[0] if urls else None
1599 def extract_id(cls, url):
1600 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1602 raise ExtractorError('Invalid URL: %s' % url)
1603 video_id = mobj.group(2)
1607 def _extract_chapters(description, duration):
1610 chapter_lines = re.findall(
1611 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1613 if not chapter_lines:
1616 for next_num, (chapter_line, time_point) in enumerate(
1617 chapter_lines, start=1):
1618 start_time = parse_duration(time_point)
1619 if start_time is None:
1621 if start_time > duration:
1623 end_time = (duration if next_num == len(chapter_lines)
1624 else parse_duration(chapter_lines[next_num][1]))
1625 if end_time is None:
1627 if end_time > duration:
1629 if start_time > end_time:
1631 chapter_title = re.sub(
1632 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1633 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1635 'start_time': start_time,
1636 'end_time': end_time,
1637 'title': chapter_title,
1641 def _real_extract(self, url):
1642 url, smuggled_data = unsmuggle_url(url, {})
1645 'http' if self._downloader.params.get('prefer_insecure', False)
1650 parsed_url = compat_urllib_parse_urlparse(url)
1651 for component in [parsed_url.fragment, parsed_url.query]:
1652 query = compat_parse_qs(component)
1653 if start_time is None and 't' in query:
1654 start_time = parse_duration(query['t'][0])
1655 if start_time is None and 'start' in query:
1656 start_time = parse_duration(query['start'][0])
1657 if end_time is None and 'end' in query:
1658 end_time = parse_duration(query['end'][0])
1660 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1661 mobj = re.search(self._NEXT_URL_RE, url)
1663 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1664 video_id = self.extract_id(url)
1667 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1668 video_webpage = self._download_webpage(url, video_id)
1670 # Attempt to extract SWF player URL
1671 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1672 if mobj is not None:
1673 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1679 def add_dash_mpd(video_info):
1680 dash_mpd = video_info.get('dashmpd')
1681 if dash_mpd and dash_mpd[0] not in dash_mpds:
1682 dash_mpds.append(dash_mpd[0])
1684 def add_dash_mpd_pr(pl_response):
1685 dash_mpd = url_or_none(try_get(
1686 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1688 if dash_mpd and dash_mpd not in dash_mpds:
1689 dash_mpds.append(dash_mpd)
1694 def extract_view_count(v_info):
1695 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1697 def extract_token(v_info):
1698 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1700 def extract_player_response(player_response, video_id):
1701 pl_response = str_or_none(player_response)
1704 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1705 if isinstance(pl_response, dict):
1706 add_dash_mpd_pr(pl_response)
1709 player_response = {}
1712 embed_webpage = None
1713 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1715 # We simulate the access to the video from www.youtube.com/v/{video_id}
1716 # this can be viewed without login into Youtube
1717 url = proto + '://www.youtube.com/embed/%s' % video_id
1718 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1719 data = compat_urllib_parse_urlencode({
1720 'video_id': video_id,
1721 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1722 'sts': self._search_regex(
1723 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1725 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1726 video_info_webpage = self._download_webpage(
1727 video_info_url, video_id,
1728 note='Refetching age-gated info webpage',
1729 errnote='unable to download video info webpage')
1730 video_info = compat_parse_qs(video_info_webpage)
1731 pl_response = video_info.get('player_response', [None])[0]
1732 player_response = extract_player_response(pl_response, video_id)
1733 add_dash_mpd(video_info)
1734 view_count = extract_view_count(video_info)
1739 # Try looking directly into the video webpage
1740 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1742 args = ytplayer_config['args']
1743 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1744 # Convert to the same format returned by compat_parse_qs
1745 video_info = dict((k, [v]) for k, v in args.items())
1746 add_dash_mpd(video_info)
1747 # Rental video is not rented but preview is available (e.g.
1748 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1749 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1750 if not video_info and args.get('ypc_vid'):
1751 return self.url_result(
1752 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1753 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1755 sts = ytplayer_config.get('sts')
1756 if not player_response:
1757 player_response = extract_player_response(args.get('player_response'), video_id)
1758 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1759 add_dash_mpd_pr(player_response)
1760 # We also try looking in get_video_info since it may contain different dashmpd
1761 # URL that points to a DASH manifest with possibly different itag set (some itags
1762 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1763 # manifest pointed by get_video_info's dashmpd).
1764 # The general idea is to take a union of itags of both DASH manifests (for example
1765 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1766 self.report_video_info_webpage_download(video_id)
1767 for el in ('embedded', 'detailpage', 'vevo', ''):
1769 'video_id': video_id,
1779 video_info_webpage = self._download_webpage(
1780 '%s://www.youtube.com/get_video_info' % proto,
1781 video_id, note=False,
1782 errnote='unable to download video info webpage',
1783 fatal=False, query=query)
1784 if not video_info_webpage:
1786 get_video_info = compat_parse_qs(video_info_webpage)
1787 if not player_response:
1788 pl_response = get_video_info.get('player_response', [None])[0]
1789 player_response = extract_player_response(pl_response, video_id)
1790 add_dash_mpd(get_video_info)
1791 if view_count is None:
1792 view_count = extract_view_count(get_video_info)
1794 video_info = get_video_info
1795 get_token = extract_token(get_video_info)
1797 # Different get_video_info requests may report different results, e.g.
1798 # some may report video unavailability, but some may serve it without
1799 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1800 # the original webpage as well as el=info and el=embedded get_video_info
1801 # requests report video unavailability due to geo restriction while
1802 # el=detailpage succeeds and returns valid data). This is probably
1803 # due to YouTube measures against IP ranges of hosting providers.
1804 # Working around by preferring the first succeeded video_info containing
1805 # the token if no such video_info yet was found.
1806 token = extract_token(video_info)
1808 video_info = get_video_info
1811 def extract_unavailable_message():
1813 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1814 msg = self._html_search_regex(
1815 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1816 video_webpage, 'unavailable %s' % kind, default=None)
1818 messages.append(msg)
1820 return '\n'.join(messages)
1823 unavailable_message = extract_unavailable_message()
1824 if not unavailable_message:
1825 unavailable_message = 'Unable to extract video data'
1826 raise ExtractorError(
1827 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1829 video_details = try_get(
1830 player_response, lambda x: x['videoDetails'], dict) or {}
1832 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1834 self._downloader.report_warning('Unable to extract video title')
1837 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1838 if video_description:
1841 redir_url = compat_urlparse.urljoin(url, m.group(1))
1842 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1843 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1844 qs = compat_parse_qs(parsed_redir_url.query)
1850 description_original = video_description = re.sub(r'''(?x)
1852 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1853 (?:title|href)="([^"]+)"\s+
1854 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1858 ''', replace_url, video_description)
1859 video_description = clean_html(video_description)
1861 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1863 if not smuggled_data.get('force_singlefeed', False):
1864 if not self._downloader.params.get('noplaylist'):
1865 multifeed_metadata_list = try_get(
1867 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1868 compat_str) or try_get(
1869 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1870 if multifeed_metadata_list:
1873 for feed in multifeed_metadata_list.split(','):
1874 # Unquote should take place before split on comma (,) since textual
1875 # fields may contain comma as well (see
1876 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1877 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1879 '_type': 'url_transparent',
1880 'ie_key': 'Youtube',
1882 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1883 {'force_singlefeed': True}),
1884 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1886 feed_ids.append(feed_data['id'][0])
1888 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1889 % (', '.join(feed_ids), video_id))
1890 return self.playlist_result(entries, video_id, video_title, video_description)
1892 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1894 if view_count is None:
1895 view_count = extract_view_count(video_info)
1896 if view_count is None and video_details:
1897 view_count = int_or_none(video_details.get('viewCount'))
1900 is_live = bool_or_none(video_details.get('isLive'))
1902 # Check for "rental" videos
1903 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1904 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1906 def _extract_filesize(media_url):
1907 return int_or_none(self._search_regex(
1908 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1910 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1911 self.report_rtmp_download()
1913 'format_id': '_rtmp',
1915 'url': video_info['conn'][0],
1916 'player_url': player_url,
1918 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1919 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1920 if 'rtmpe%3Dyes' in encoded_url_map:
1921 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1923 fmt_list = video_info.get('fmt_list', [''])[0]
1925 for fmt in fmt_list.split(','):
1926 spec = fmt.split('/')
1928 width_height = spec[1].split('x')
1929 if len(width_height) == 2:
1930 formats_spec[spec[0]] = {
1931 'resolution': spec[1],
1932 'width': int_or_none(width_height[0]),
1933 'height': int_or_none(width_height[1]),
1935 q = qualities(['small', 'medium', 'hd720'])
1936 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1937 if streaming_formats:
1938 for fmt in streaming_formats:
1939 itag = str_or_none(fmt.get('itag'))
1942 quality = fmt.get('quality')
1943 quality_label = fmt.get('qualityLabel') or quality
1944 formats_spec[itag] = {
1945 'asr': int_or_none(fmt.get('audioSampleRate')),
1946 'filesize': int_or_none(fmt.get('contentLength')),
1947 'format_note': quality_label,
1948 'fps': int_or_none(fmt.get('fps')),
1949 'height': int_or_none(fmt.get('height')),
1950 'quality': q(quality),
1951 # bitrate for itag 43 is always 2147483647
1952 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1953 'width': int_or_none(fmt.get('width')),
1956 for url_data_str in encoded_url_map.split(','):
1957 url_data = compat_parse_qs(url_data_str)
1958 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1960 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1961 # Unsupported FORMAT_STREAM_TYPE_OTF
1962 if stream_type == 3:
1964 format_id = url_data['itag'][0]
1965 url = url_data['url'][0]
1967 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1968 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1969 jsplayer_url_json = self._search_regex(
1971 embed_webpage if age_gate else video_webpage,
1972 'JS player URL (1)', default=None)
1973 if not jsplayer_url_json and not age_gate:
1974 # We need the embed website after all
1975 if embed_webpage is None:
1976 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1977 embed_webpage = self._download_webpage(
1978 embed_url, video_id, 'Downloading embed webpage')
1979 jsplayer_url_json = self._search_regex(
1980 ASSETS_RE, embed_webpage, 'JS player URL')
1982 player_url = json.loads(jsplayer_url_json)
1983 if player_url is None:
1984 player_url_json = self._search_regex(
1985 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1986 video_webpage, 'age gate player URL')
1987 player_url = json.loads(player_url_json)
1989 if 'sig' in url_data:
1990 url += '&signature=' + url_data['sig'][0]
1991 elif 's' in url_data:
1992 encrypted_sig = url_data['s'][0]
1994 if self._downloader.params.get('verbose'):
1995 if player_url is None:
1996 player_version = 'unknown'
1997 player_desc = 'unknown'
1999 if player_url.endswith('swf'):
2000 player_version = self._search_regex(
2001 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2002 'flash player', fatal=False)
2003 player_desc = 'flash player %s' % player_version
2005 player_version = self._search_regex(
2006 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2007 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2009 'html5 player', fatal=False)
2010 player_desc = 'html5 player %s' % player_version
2012 parts_sizes = self._signature_cache_id(encrypted_sig)
2013 self.to_screen('{%s} signature length %s, %s' %
2014 (format_id, parts_sizes, player_desc))
2016 signature = self._decrypt_signature(
2017 encrypted_sig, video_id, player_url, age_gate)
2018 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2019 url += '&%s=%s' % (sp, signature)
2020 if 'ratebypass' not in url:
2021 url += '&ratebypass=yes'
2024 'format_id': format_id,
2026 'player_url': player_url,
2028 if format_id in self._formats:
2029 dct.update(self._formats[format_id])
2030 if format_id in formats_spec:
2031 dct.update(formats_spec[format_id])
2033 # Some itags are not included in DASH manifest thus corresponding formats will
2034 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2035 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2036 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2037 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2039 filesize = int_or_none(url_data.get(
2040 'clen', [None])[0]) or _extract_filesize(url)
2042 quality = url_data.get('quality', [None])[0]
2045 'filesize': filesize,
2046 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2049 'fps': int_or_none(url_data.get('fps', [None])[0]),
2050 'format_note': url_data.get('quality_label', [None])[0] or quality,
2051 'quality': q(quality),
2053 for key, value in more_fields.items():
2056 type_ = url_data.get('type', [None])[0]
2058 type_split = type_.split(';')
2059 kind_ext = type_split[0].split('/')
2060 if len(kind_ext) == 2:
2062 dct['ext'] = mimetype2ext(type_split[0])
2063 if kind in ('audio', 'video'):
2065 for mobj in re.finditer(
2066 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2067 if mobj.group('key') == 'codecs':
2068 codecs = mobj.group('val')
2071 dct.update(parse_codecs(codecs))
2072 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2073 dct['downloader_options'] = {
2074 # Youtube throttles chunks >~10M
2075 'http_chunk_size': 10485760,
2080 url_or_none(try_get(
2082 lambda x: x['streamingData']['hlsManifestUrl'],
2084 or url_or_none(try_get(
2085 video_info, lambda x: x['hlsvp'][0], compat_str)))
2088 m3u8_formats = self._extract_m3u8_formats(
2089 manifest_url, video_id, 'mp4', fatal=False)
2090 for a_format in m3u8_formats:
2091 itag = self._search_regex(
2092 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2094 a_format['format_id'] = itag
2095 if itag in self._formats:
2096 dct = self._formats[itag].copy()
2097 dct.update(a_format)
2099 a_format['player_url'] = player_url
2100 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2101 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2102 formats.append(a_format)
2104 error_message = extract_unavailable_message()
2105 if not error_message:
2106 error_message = clean_html(try_get(
2107 player_response, lambda x: x['playabilityStatus']['reason'],
2109 if not error_message:
2110 error_message = clean_html(
2111 try_get(video_info, lambda x: x['reason'][0], compat_str))
2113 raise ExtractorError(error_message, expected=True)
2114 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2117 video_uploader = try_get(
2118 video_info, lambda x: x['author'][0],
2119 compat_str) or str_or_none(video_details.get('author'))
2121 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2123 self._downloader.report_warning('unable to extract uploader name')
2126 video_uploader_id = None
2127 video_uploader_url = None
2129 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2131 if mobj is not None:
2132 video_uploader_id = mobj.group('uploader_id')
2133 video_uploader_url = mobj.group('uploader_url')
2135 self._downloader.report_warning('unable to extract uploader nickname')
2138 str_or_none(video_details.get('channelId'))
2139 or self._html_search_meta(
2140 'channelId', video_webpage, 'channel id', default=None)
2141 or self._search_regex(
2142 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2143 video_webpage, 'channel id', default=None, group='id'))
2144 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2147 # We try first to get a high quality image:
2148 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2149 video_webpage, re.DOTALL)
2150 if m_thumb is not None:
2151 video_thumbnail = m_thumb.group(1)
2152 elif 'thumbnail_url' not in video_info:
2153 self._downloader.report_warning('unable to extract video thumbnail')
2154 video_thumbnail = None
2155 else: # don't panic if we can't find it
2156 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2159 upload_date = self._html_search_meta(
2160 'datePublished', video_webpage, 'upload date', default=None)
2162 upload_date = self._search_regex(
2163 [r'(?s)id="eow-date.*?>(.*?)</span>',
2164 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2165 video_webpage, 'upload date', default=None)
2166 upload_date = unified_strdate(upload_date)
2168 video_license = self._html_search_regex(
2169 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2170 video_webpage, 'license', default=None)
2172 m_music = re.search(
2174 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2182 \bhref=["\']/red[^>]*>| # drop possible
2183 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2190 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2191 video_creator = clean_html(m_music.group('creator'))
2193 video_alt_title = video_creator = None
2195 def extract_meta(field):
2196 return self._html_search_regex(
2197 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2198 video_webpage, field, default=None)
2200 track = extract_meta('Song')
2201 artist = extract_meta('Artist')
2202 album = extract_meta('Album')
2204 # Youtube Music Auto-generated description
2205 release_date = release_year = None
2206 if video_description:
2207 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2210 track = mobj.group('track').strip()
2212 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2214 album = mobj.group('album'.strip())
2215 release_year = mobj.group('release_year')
2216 release_date = mobj.group('release_date')
2218 release_date = release_date.replace('-', '')
2219 if not release_year:
2220 release_year = int(release_date[:4])
2222 release_year = int(release_year)
2224 m_episode = re.search(
2225 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2228 series = unescapeHTML(m_episode.group('series'))
2229 season_number = int(m_episode.group('season'))
2230 episode_number = int(m_episode.group('episode'))
2232 series = season_number = episode_number = None
2234 m_cat_container = self._search_regex(
2235 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2236 video_webpage, 'categories', default=None)
2238 category = self._html_search_regex(
2239 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2241 video_categories = None if category is None else [category]
2243 video_categories = None
2246 unescapeHTML(m.group('content'))
2247 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2249 def _extract_count(count_name):
2250 return str_to_int(self._search_regex(
2251 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2252 % re.escape(count_name),
2253 video_webpage, count_name, default=None))
2255 like_count = _extract_count('like')
2256 dislike_count = _extract_count('dislike')
2258 if view_count is None:
2259 view_count = str_to_int(self._search_regex(
2260 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2261 'view count', default=None))
2264 float_or_none(video_details.get('averageRating'))
2265 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2268 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2269 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2271 video_duration = try_get(
2272 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2273 if not video_duration:
2274 video_duration = int_or_none(video_details.get('lengthSeconds'))
2275 if not video_duration:
2276 video_duration = parse_duration(self._html_search_meta(
2277 'duration', video_webpage, 'video duration'))
2280 video_annotations = None
2281 if self._downloader.params.get('writeannotations', False):
2282 xsrf_token = self._search_regex(
2283 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2284 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2285 invideo_url = try_get(
2286 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2287 if xsrf_token and invideo_url:
2288 xsrf_field_name = self._search_regex(
2289 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2290 video_webpage, 'xsrf field name',
2291 group='xsrf_field_name', default='session_token')
2292 video_annotations = self._download_webpage(
2293 self._proto_relative_url(invideo_url),
2294 video_id, note='Downloading annotations',
2295 errnote='Unable to download video annotations', fatal=False,
2296 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2298 chapters = self._extract_chapters(description_original, video_duration)
2300 # Look for the DASH manifest
2301 if self._downloader.params.get('youtube_include_dash_manifest', True):
2302 dash_mpd_fatal = True
2303 for mpd_url in dash_mpds:
2306 def decrypt_sig(mobj):
2308 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2309 return '/signature/%s' % dec_s
2311 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2313 for df in self._extract_mpd_formats(
2314 mpd_url, video_id, fatal=dash_mpd_fatal,
2315 formats_dict=self._formats):
2316 if not df.get('filesize'):
2317 df['filesize'] = _extract_filesize(df['url'])
2318 # Do not overwrite DASH format found in some previous DASH manifest
2319 if df['format_id'] not in dash_formats:
2320 dash_formats[df['format_id']] = df
2321 # Additional DASH manifests may end up in HTTP Error 403 therefore
2322 # allow them to fail without bug report message if we already have
2323 # some DASH manifest succeeded. This is temporary workaround to reduce
2324 # burst of bug reports until we figure out the reason and whether it
2325 # can be fixed at all.
2326 dash_mpd_fatal = False
2327 except (ExtractorError, KeyError) as e:
2328 self.report_warning(
2329 'Skipping DASH manifest: %r' % e, video_id)
2331 # Remove the formats we found through non-DASH, they
2332 # contain less info and it can be wrong, because we use
2333 # fixed values (for example the resolution). See
2334 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2336 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2337 formats.extend(dash_formats.values())
2339 # Check for malformed aspect ratio
2340 stretched_m = re.search(
2341 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2344 w = float(stretched_m.group('w'))
2345 h = float(stretched_m.group('h'))
2346 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2347 # We will only process correct ratios.
2351 if f.get('vcodec') != 'none':
2352 f['stretched_ratio'] = ratio
2355 token = extract_token(video_info)
2357 if 'reason' in video_info:
2358 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2359 regions_allowed = self._html_search_meta(
2360 'regionsAllowed', video_webpage, default=None)
2361 countries = regions_allowed.split(',') if regions_allowed else None
2362 self.raise_geo_restricted(
2363 msg=video_info['reason'][0], countries=countries)
2364 reason = video_info['reason'][0]
2365 if 'Invalid parameters' in reason:
2366 unavailable_message = extract_unavailable_message()
2367 if unavailable_message:
2368 reason = unavailable_message
2369 raise ExtractorError(
2370 'YouTube said: %s' % reason,
2371 expected=True, video_id=video_id)
2373 raise ExtractorError(
2374 '"token" parameter not in video info for unknown reason',
2377 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2378 raise ExtractorError('This video is DRM protected.', expected=True)
2380 self._sort_formats(formats)
2382 self.mark_watched(video_id, video_info, player_response)
2386 'uploader': video_uploader,
2387 'uploader_id': video_uploader_id,
2388 'uploader_url': video_uploader_url,
2389 'channel_id': channel_id,
2390 'channel_url': channel_url,
2391 'upload_date': upload_date,
2392 'license': video_license,
2393 'creator': video_creator or artist,
2394 'title': video_title,
2395 'alt_title': video_alt_title or track,
2396 'thumbnail': video_thumbnail,
2397 'description': video_description,
2398 'categories': video_categories,
2400 'subtitles': video_subtitles,
2401 'automatic_captions': automatic_captions,
2402 'duration': video_duration,
2403 'age_limit': 18 if age_gate else 0,
2404 'annotations': video_annotations,
2405 'chapters': chapters,
2406 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2407 'view_count': view_count,
2408 'like_count': like_count,
2409 'dislike_count': dislike_count,
2410 'average_rating': average_rating,
2413 'start_time': start_time,
2414 'end_time': end_time,
2416 'season_number': season_number,
2417 'episode_number': episode_number,
2421 'release_date': release_date,
2422 'release_year': release_year,
2426 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2427 IE_DESC = 'YouTube.com playlists'
2428 _VALID_URL = r"""(?x)(?:
2438 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2439 \? (?:.*?[&;])*? (?:p|a|list)=
2442 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2445 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2446 # Top tracks, they can also include dots
2452 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2453 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2454 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2455 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2456 IE_NAME = 'youtube:playlist'
2458 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2460 'title': 'ytdl test PL',
2461 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2463 'playlist_count': 3,
2465 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2467 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2468 'title': 'YDL_Empty_List',
2470 'playlist_count': 0,
2471 'skip': 'This playlist is private',
2473 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2474 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2476 'title': '29C3: Not my department',
2477 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2478 'uploader': 'Christiaan008',
2479 'uploader_id': 'ChRiStIaAn008',
2481 'playlist_count': 95,
2483 'note': 'issue #673',
2484 'url': 'PLBB231211A4F62143',
2486 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2487 'id': 'PLBB231211A4F62143',
2488 'uploader': 'Wickydoo',
2489 'uploader_id': 'Wickydoo',
2491 'playlist_mincount': 26,
2493 'note': 'Large playlist',
2494 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2496 'title': 'Uploads from Cauchemar',
2497 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2498 'uploader': 'Cauchemar',
2499 'uploader_id': 'Cauchemar89',
2501 'playlist_mincount': 799,
2503 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2505 'title': 'YDL_safe_search',
2506 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2508 'playlist_count': 2,
2509 'skip': 'This playlist is private',
2512 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2513 'playlist_count': 4,
2516 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2517 'uploader': 'milan',
2518 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2521 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2522 'playlist_mincount': 485,
2524 'title': '2018 Chinese New Singles (11/6 updated)',
2525 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2527 'uploader_id': 'sdragonfang',
2530 'note': 'Embedded SWF player',
2531 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2532 'playlist_count': 4,
2535 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2537 'skip': 'This playlist does not exist',
2539 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2540 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2542 'title': 'Uploads from Interstellar Movie',
2543 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2544 'uploader': 'Interstellar Movie',
2545 'uploader_id': 'InterstellarMovie1',
2547 'playlist_mincount': 21,
2549 # Playlist URL that does not actually serve a playlist
2550 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2552 'id': 'FqZTN594JQw',
2554 'title': "Smiley's People 01 detective, Adventure Series, Action",
2555 'uploader': 'STREEM',
2556 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2557 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2558 'upload_date': '20150526',
2559 'license': 'Standard YouTube License',
2560 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2561 'categories': ['People & Blogs'],
2565 'dislike_count': int,
2568 'skip_download': True,
2570 'skip': 'This video is not available.',
2571 'add_ie': [YoutubeIE.ie_key()],
2573 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2575 'id': 'yeWKywCrFtk',
2577 'title': 'Small Scale Baler and Braiding Rugs',
2578 'uploader': 'Backus-Page House Museum',
2579 'uploader_id': 'backuspagemuseum',
2580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2581 'upload_date': '20161008',
2582 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2583 'categories': ['Nonprofits & Activism'],
2586 'dislike_count': int,
2590 'skip_download': True,
2593 # https://github.com/ytdl-org/youtube-dl/issues/21844
2594 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2596 'title': 'Data Analysis with Dr Mike Pound',
2597 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2598 'uploader_id': 'Computerphile',
2599 'uploader': 'Computerphile',
2601 'playlist_mincount': 11,
2603 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2604 'only_matching': True,
2606 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2607 'only_matching': True,
2609 # music album playlist
2610 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2611 'only_matching': True,
2613 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2614 'only_matching': True,
2617 def _real_initialize(self):
2620 def extract_videos_from_page(self, page):
2624 for item in re.findall(
2625 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2626 attrs = extract_attributes(item)
2627 video_id = attrs['data-video-id']
2628 video_title = unescapeHTML(attrs.get('data-title'))
2630 video_title = video_title.strip()
2631 ids_in_page.append(video_id)
2632 titles_in_page.append(video_title)
2634 # Fallback with old _VIDEO_RE
2635 self.extract_videos_from_page_impl(
2636 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2639 self.extract_videos_from_page_impl(
2640 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2641 ids_in_page, titles_in_page)
2642 self.extract_videos_from_page_impl(
2643 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2644 ids_in_page, titles_in_page)
2646 return zip(ids_in_page, titles_in_page)
2648 def _extract_mix(self, playlist_id):
2649 # The mixes are generated from a single video
2650 # the id of the playlist is just 'RD' + video_id
2652 last_id = playlist_id[-11:]
2653 for n in itertools.count(1):
2654 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2655 webpage = self._download_webpage(
2656 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2657 new_ids = orderedSet(re.findall(
2658 r'''(?xs)data-video-username=".*?".*?
2659 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2661 # Fetch new pages until all the videos are repeated, it seems that
2662 # there are always 51 unique videos.
2663 new_ids = [_id for _id in new_ids if _id not in ids]
2669 url_results = self._ids_to_results(ids)
2671 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2673 search_title('playlist-title')
2674 or search_title('title long-title')
2675 or search_title('title'))
2676 title = clean_html(title_span)
2678 return self.playlist_result(url_results, playlist_id, title)
2680 def _extract_playlist(self, playlist_id):
2681 url = self._TEMPLATE_URL % playlist_id
2682 page = self._download_webpage(url, playlist_id)
2684 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2685 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2686 match = match.strip()
2687 # Check if the playlist exists or is private
2688 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2690 reason = mobj.group('reason')
2691 message = 'This playlist %s' % reason
2692 if 'private' in reason:
2693 message += ', use --username or --netrc to access it'
2695 raise ExtractorError(message, expected=True)
2696 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2697 raise ExtractorError(
2698 'Invalid parameters. Maybe URL is incorrect.',
2700 elif re.match(r'[^<]*Choose your language[^<]*', match):
2703 self.report_warning('Youtube gives an alert message: ' + match)
2705 playlist_title = self._html_search_regex(
2706 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2707 page, 'title', default=None)
2709 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2710 uploader = self._search_regex(
2711 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2712 page, 'uploader', default=None)
2714 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2717 uploader_id = mobj.group('uploader_id')
2718 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2720 uploader_id = uploader_url = None
2724 if not playlist_title:
2726 # Some playlist URLs don't actually serve a playlist (e.g.
2727 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2728 next(self._entries(page, playlist_id))
2729 except StopIteration:
2732 playlist = self.playlist_result(
2733 self._entries(page, playlist_id), playlist_id, playlist_title)
2735 'uploader': uploader,
2736 'uploader_id': uploader_id,
2737 'uploader_url': uploader_url,
2740 return has_videos, playlist
2742 def _check_download_just_video(self, url, playlist_id):
2743 # Check if it's a video-specific URL
2744 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2745 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2746 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2747 'video id', default=None)
2749 if self._downloader.params.get('noplaylist'):
2750 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2751 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2753 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2754 return video_id, None
2757 def _real_extract(self, url):
2758 # Extract playlist id
2759 mobj = re.match(self._VALID_URL, url)
2761 raise ExtractorError('Invalid URL: %s' % url)
2762 playlist_id = mobj.group(1) or mobj.group(2)
2764 video_id, video = self._check_download_just_video(url, playlist_id)
2768 if playlist_id.startswith(('RD', 'UL', 'PU')):
2769 # Mixes require a custom extraction process
2770 return self._extract_mix(playlist_id)
2772 has_videos, playlist = self._extract_playlist(playlist_id)
2773 if has_videos or not video_id:
2776 # Some playlist URLs don't actually serve a playlist (see
2777 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2778 # Fallback to plain video extraction if there is a video id
2779 # along with playlist id.
2780 return self.url_result(video_id, 'Youtube', video_id=video_id)
2783 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2784 IE_DESC = 'YouTube.com channels'
2785 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2786 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2787 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2788 IE_NAME = 'youtube:channel'
2790 'note': 'paginated channel',
2791 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2792 'playlist_mincount': 91,
2794 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2795 'title': 'Uploads from lex will',
2796 'uploader': 'lex will',
2797 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2800 'note': 'Age restricted channel',
2801 # from https://www.youtube.com/user/DeusExOfficial
2802 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2803 'playlist_mincount': 64,
2805 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2806 'title': 'Uploads from Deus Ex',
2807 'uploader': 'Deus Ex',
2808 'uploader_id': 'DeusExOfficial',
2811 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2812 'only_matching': True,
2816 def suitable(cls, url):
2817 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2818 else super(YoutubeChannelIE, cls).suitable(url))
2820 def _build_template_url(self, url, channel_id):
2821 return self._TEMPLATE_URL % channel_id
2823 def _real_extract(self, url):
2824 channel_id = self._match_id(url)
2826 url = self._build_template_url(url, channel_id)
2828 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2829 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2830 # otherwise fallback on channel by page extraction
2831 channel_page = self._download_webpage(
2832 url + '?view=57', channel_id,
2833 'Downloading channel page', fatal=False)
2834 if channel_page is False:
2835 channel_playlist_id = False
2837 channel_playlist_id = self._html_search_meta(
2838 'channelId', channel_page, 'channel id', default=None)
2839 if not channel_playlist_id:
2840 channel_url = self._html_search_meta(
2841 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2842 channel_page, 'channel url', default=None)
2844 channel_playlist_id = self._search_regex(
2845 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2846 channel_url, 'channel id', default=None)
2847 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2848 playlist_id = 'UU' + channel_playlist_id[2:]
2849 return self.url_result(
2850 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2852 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2853 autogenerated = re.search(r'''(?x)
2855 channel-header-autogenerated-label|
2856 yt-channel-title-autogenerated
2857 )[^"]*"''', channel_page) is not None
2860 # The videos are contained in a single page
2861 # the ajax pages can't be used, they are empty
2864 video_id, 'Youtube', video_id=video_id,
2865 video_title=video_title)
2866 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2867 return self.playlist_result(entries, channel_id)
2870 next(self._entries(channel_page, channel_id))
2871 except StopIteration:
2872 alert_message = self._html_search_regex(
2873 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2874 channel_page, 'alert', default=None, group='alert')
2876 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2878 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2881 class YoutubeUserIE(YoutubeChannelIE):
2882 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2883 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2884 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2885 IE_NAME = 'youtube:user'
2888 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2889 'playlist_mincount': 320,
2891 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2892 'title': 'Uploads from The Linux Foundation',
2893 'uploader': 'The Linux Foundation',
2894 'uploader_id': 'TheLinuxFoundation',
2897 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2898 # but not https://www.youtube.com/user/12minuteathlete/videos
2899 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2900 'playlist_mincount': 249,
2902 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2903 'title': 'Uploads from 12 Minute Athlete',
2904 'uploader': '12 Minute Athlete',
2905 'uploader_id': 'the12minuteathlete',
2908 'url': 'ytuser:phihag',
2909 'only_matching': True,
2911 'url': 'https://www.youtube.com/c/gametrailers',
2912 'only_matching': True,
2914 'url': 'https://www.youtube.com/gametrailers',
2915 'only_matching': True,
2917 # This channel is not available, geo restricted to JP
2918 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2919 'only_matching': True,
2923 def suitable(cls, url):
2924 # Don't return True if the url can be extracted with other youtube
2925 # extractor, the regex would is too permissive and it would match.
2926 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2927 if any(ie.suitable(url) for ie in other_yt_ies):
2930 return super(YoutubeUserIE, cls).suitable(url)
2932 def _build_template_url(self, url, channel_id):
2933 mobj = re.match(self._VALID_URL, url)
2934 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2937 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2938 IE_DESC = 'YouTube.com live streams'
2939 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2940 IE_NAME = 'youtube:live'
2943 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2945 'id': 'a48o2S1cPoo',
2947 'title': 'The Young Turks - Live Main Show',
2948 'uploader': 'The Young Turks',
2949 'uploader_id': 'TheYoungTurks',
2950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2951 'upload_date': '20150715',
2952 'license': 'Standard YouTube License',
2953 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2954 'categories': ['News & Politics'],
2955 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2957 'dislike_count': int,
2960 'skip_download': True,
2963 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2964 'only_matching': True,
2966 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2967 'only_matching': True,
2969 'url': 'https://www.youtube.com/TheYoungTurks/live',
2970 'only_matching': True,
2973 def _real_extract(self, url):
2974 mobj = re.match(self._VALID_URL, url)
2975 channel_id = mobj.group('id')
2976 base_url = mobj.group('base_url')
2977 webpage = self._download_webpage(url, channel_id, fatal=False)
2979 page_type = self._og_search_property(
2980 'type', webpage, 'page type', default='')
2981 video_id = self._html_search_meta(
2982 'videoId', webpage, 'video id', default=None)
2983 if page_type.startswith('video') and video_id and re.match(
2984 r'^[0-9A-Za-z_-]{11}$', video_id):
2985 return self.url_result(video_id, YoutubeIE.ie_key())
2986 return self.url_result(base_url)
2989 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2990 IE_DESC = 'YouTube.com user/channel playlists'
2991 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2992 IE_NAME = 'youtube:playlists'
2995 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2996 'playlist_mincount': 4,
2998 'id': 'ThirstForScience',
2999 'title': 'ThirstForScience',
3002 # with "Load more" button
3003 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3004 'playlist_mincount': 70,
3007 'title': 'Игорь Клейнер',
3010 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3011 'playlist_mincount': 17,
3013 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3014 'title': 'Chem Player',
3020 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3021 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3024 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3025 IE_DESC = 'YouTube.com searches'
3026 # there doesn't appear to be a real limit, for example if you search for
3027 # 'python' you get more than 8.000.000 results
3028 _MAX_RESULTS = float('inf')
3029 IE_NAME = 'youtube:search'
3030 _SEARCH_KEY = 'ytsearch'
3031 _EXTRA_QUERY_ARGS = {}
3034 def _get_n_results(self, query, n):
3035 """Get a specified number of results for a query"""
3041 'search_query': query.encode('utf-8'),
3043 url_query.update(self._EXTRA_QUERY_ARGS)
3044 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3046 for pagenum in itertools.count(1):
3047 data = self._download_json(
3048 result_url, video_id='query "%s"' % query,
3049 note='Downloading page %s' % pagenum,
3050 errnote='Unable to download API page',
3051 query={'spf': 'navigate'})
3052 html_content = data[1]['body']['content']
3054 if 'class="search-message' in html_content:
3055 raise ExtractorError(
3056 '[youtube] No video results', expected=True)
3058 new_videos = list(self._process_page(html_content))
3059 videos += new_videos
3060 if not new_videos or len(videos) > limit:
3062 next_link = self._html_search_regex(
3063 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3064 html_content, 'next link', default=None)
3065 if next_link is None:
3067 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3071 return self.playlist_result(videos, query)
3074 class YoutubeSearchDateIE(YoutubeSearchIE):
3075 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3076 _SEARCH_KEY = 'ytsearchdate'
3077 IE_DESC = 'YouTube.com searches, newest videos first'
3078 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3081 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3082 IE_DESC = 'YouTube.com search URLs'
3083 IE_NAME = 'youtube:search_url'
3084 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3086 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3087 'playlist_mincount': 5,
3089 'title': 'youtube-dl test video',
3092 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3093 'only_matching': True,
3096 def _real_extract(self, url):
3097 mobj = re.match(self._VALID_URL, url)
3098 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3099 webpage = self._download_webpage(url, query)
3100 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3103 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3104 IE_DESC = 'YouTube.com (multi-season) shows'
3105 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3106 IE_NAME = 'youtube:show'
3108 'url': 'https://www.youtube.com/show/airdisasters',
3109 'playlist_mincount': 5,
3111 'id': 'airdisasters',
3112 'title': 'Air Disasters',
3116 def _real_extract(self, url):
3117 playlist_id = self._match_id(url)
3118 return super(YoutubeShowIE, self)._real_extract(
3119 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3122 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3124 Base class for feed extractors
3125 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3127 _LOGIN_REQUIRED = True
3131 return 'youtube:%s' % self._FEED_NAME
3133 def _real_initialize(self):
3136 def _entries(self, page):
3137 # The extraction process is the same as for playlists, but the regex
3138 # for the video ids doesn't contain an index
3140 more_widget_html = content_html = page
3141 for page_num in itertools.count(1):
3142 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3144 # 'recommended' feed has infinite 'load more' and each new portion spins
3145 # the same videos in (sometimes) slightly different order, so we'll check
3146 # for unicity and break when portion has no new videos
3147 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3153 for entry in self._ids_to_results(new_ids):
3156 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3160 more = self._download_json(
3161 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3162 'Downloading page #%s' % page_num,
3163 transform_source=uppercase_escape)
3164 content_html = more['content_html']
3165 more_widget_html = more['load_more_widget_html']
3167 def _real_extract(self, url):
3168 page = self._download_webpage(
3169 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3170 self._PLAYLIST_TITLE)
3171 return self.playlist_result(
3172 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3175 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3176 IE_NAME = 'youtube:watchlater'
3177 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3178 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3181 'url': 'https://www.youtube.com/playlist?list=WL',
3182 'only_matching': True,
3184 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3185 'only_matching': True,
3188 def _real_extract(self, url):
3189 _, video = self._check_download_just_video(url, 'WL')
3192 _, playlist = self._extract_playlist('WL')
3196 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3197 IE_NAME = 'youtube:favorites'
3198 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3199 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3200 _LOGIN_REQUIRED = True
3202 def _real_extract(self, url):
3203 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3204 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3205 return self.url_result(playlist_id, 'YoutubePlaylist')
3208 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3209 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3210 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3211 _FEED_NAME = 'recommended'
3212 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3215 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3216 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3217 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3218 _FEED_NAME = 'subscriptions'
3219 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3222 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3223 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3224 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3225 _FEED_NAME = 'history'
3226 _PLAYLIST_TITLE = 'Youtube History'
3229 class YoutubeTruncatedURLIE(InfoExtractor):
3230 IE_NAME = 'youtube:truncated_url'
3231 IE_DESC = False # Do not list
3232 _VALID_URL = r'''(?x)
3234 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3237 annotation_id=annotation_[^&]+|
3243 attribution_link\?a=[^&]+
3249 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3250 'only_matching': True,
3252 'url': 'https://www.youtube.com/watch?',
3253 'only_matching': True,
3255 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3256 'only_matching': True,
3258 'url': 'https://www.youtube.com/watch?feature=foo',
3259 'only_matching': True,
3261 'url': 'https://www.youtube.com/watch?hl=en-GB',
3262 'only_matching': True,
3264 'url': 'https://www.youtube.com/watch?t=2372',
3265 'only_matching': True,
3268 def _real_extract(self, url):
3269 raise ExtractorError(
3270 'Did you forget to quote the URL? Remember that & is a meta '
3271 'character in most shells, so you want to put the URL in quotes, '
3273 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3274 ' or simply youtube-dl BaW_jenozKc .',
3278 class YoutubeTruncatedIDIE(InfoExtractor):
3279 IE_NAME = 'youtube:truncated_id'
3280 IE_DESC = False # Do not list
3281 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3284 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3285 'only_matching': True,
3288 def _real_extract(self, url):
3289 video_id = self._match_id(url)
3290 raise ExtractorError(
3291 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),