3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
60 class YoutubeBaseInfoExtractor(InfoExtractor):
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
73 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
75 def _set_language(self):
77 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
78 # YouTube sets the expire time to about two months
79 expire_time=time.time() + 2 * 30 * 24 * 3600)
81 def _ids_to_results(self, ids):
83 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 Attempt to log in to YouTube.
89 True is returned if successful or skipped.
90 False is returned if login failed.
92 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 username, password = self._get_login_info()
95 # No authentication to be performed
97 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
98 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
101 login_page = self._download_webpage(
102 self._LOGIN_URL, None,
103 note='Downloading login page',
104 errnote='unable to fetch login page', fatal=False)
105 if login_page is False:
108 login_form = self._hidden_inputs(login_page)
110 def req(url, f_req, note, errnote):
111 data = login_form.copy()
114 'checkConnection': 'youtube',
115 'checkedDomains': 'youtube',
117 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
118 'f.req': json.dumps(f_req),
119 'flowName': 'GlifWebSignIn',
120 'flowEntry': 'ServiceLogin',
121 # TODO: reverse actual botguard identifier generation algo
122 'bgRequest': '["identifier",""]',
124 return self._download_json(
125 url, None, note=note, errnote=errnote,
126 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
128 data=urlencode_postdata(data), headers={
129 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
130 'Google-Accounts-XSRF': 1,
134 self._downloader.report_warning(message)
138 None, [], None, 'US', None, None, 2, False, True,
142 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
144 1, [None, None, []], None, None, None, True
149 lookup_results = req(
150 self._LOOKUP_URL, lookup_req,
151 'Looking up account info', 'Unable to look up account info')
153 if lookup_results is False:
156 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
158 warn('Unable to extract user hash')
163 None, 1, None, [1, None, None, None, [password, None, True]],
165 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
166 1, [None, None, []], None, None, None, True
169 challenge_results = req(
170 self._CHALLENGE_URL, challenge_req,
171 'Logging in', 'Unable to log in')
173 if challenge_results is False:
176 login_res = try_get(challenge_results, lambda x: x[0][5], list)
178 login_msg = try_get(login_res, lambda x: x[5], compat_str)
180 'Unable to login: %s' % 'Invalid password'
181 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
184 res = try_get(challenge_results, lambda x: x[0][-1], list)
186 warn('Unable to extract result entry')
189 login_challenge = try_get(res, lambda x: x[0][0], list)
191 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
192 if challenge_str == 'TWO_STEP_VERIFICATION':
193 # SEND_SUCCESS - TFA code has been successfully sent to phone
194 # QUOTA_EXCEEDED - reached the limit of TFA codes
195 status = try_get(login_challenge, lambda x: x[5], compat_str)
196 if status == 'QUOTA_EXCEEDED':
197 warn('Exceeded the limit of TFA codes, try later')
200 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
202 warn('Unable to extract TL')
205 tfa_code = self._get_tfa_info('2-step verification code')
209 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
210 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
213 tfa_code = remove_start(tfa_code, 'G-')
216 user_hash, None, 2, None,
218 9, None, None, None, None, None, None, None,
219 [None, tfa_code, True, 2]
223 self._TFA_URL.format(tl), tfa_req,
224 'Submitting TFA code', 'Unable to submit TFA code')
226 if tfa_results is False:
229 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
231 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
233 'Unable to finish TFA: %s' % 'Invalid TFA code'
234 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
237 check_cookie_url = try_get(
238 tfa_results, lambda x: x[0][-1][2], compat_str)
241 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
242 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
243 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
245 challenge = CHALLENGES.get(
247 '%s returned error %s.' % (self.IE_NAME, challenge_str))
248 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
251 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
253 if not check_cookie_url:
254 warn('Unable to extract CheckCookie URL')
257 check_cookie_results = self._download_webpage(
258 check_cookie_url, None, 'Checking cookie', fatal=False)
260 if check_cookie_results is False:
263 if 'https://myaccount.google.com/' not in check_cookie_results:
264 warn('Unable to log in')
269 def _download_webpage_handle(self, *args, **kwargs):
270 query = kwargs.get('query', {}).copy()
271 query['disable_polymer'] = 'true'
272 kwargs['query'] = query
273 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
274 *args, **compat_kwargs(kwargs))
276 def _real_initialize(self):
277 if self._downloader is None:
280 if not self._login():
284 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
285 # Extract entries from page with "Load more" button
286 def _entries(self, page, playlist_id):
287 more_widget_html = content_html = page
288 for page_num in itertools.count(1):
289 for entry in self._process_page(content_html):
292 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
298 while count <= retries:
300 # Downloading page may result in intermittent 5xx HTTP error
301 # that is usually worked around with a retry
302 more = self._download_json(
303 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
304 'Downloading page #%s%s'
305 % (page_num, ' (retry #%d)' % count if count else ''),
306 transform_source=uppercase_escape)
308 except ExtractorError as e:
309 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
315 content_html = more['content_html']
316 if not content_html.strip():
317 # Some webpages show a "Load more" button but they don't
320 more_widget_html = more['load_more_widget_html']
323 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
324 def _process_page(self, content):
325 for video_id, video_title in self.extract_videos_from_page(content):
326 yield self.url_result(video_id, 'Youtube', video_id, video_title)
328 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
329 for mobj in re.finditer(video_re, page):
330 # The link with index 0 is not the first video of the playlist (not sure if still actual)
331 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
333 video_id = mobj.group('id')
334 video_title = unescapeHTML(
335 mobj.group('title')) if 'title' in mobj.groupdict() else None
337 video_title = video_title.strip()
338 if video_title == '► Play all':
341 idx = ids_in_page.index(video_id)
342 if video_title and not titles_in_page[idx]:
343 titles_in_page[idx] = video_title
345 ids_in_page.append(video_id)
346 titles_in_page.append(video_title)
348 def extract_videos_from_page(self, page):
351 self.extract_videos_from_page_impl(
352 self._VIDEO_RE, page, ids_in_page, titles_in_page)
353 return zip(ids_in_page, titles_in_page)
356 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
357 def _process_page(self, content):
358 for playlist_id in orderedSet(re.findall(
359 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
361 yield self.url_result(
362 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
364 def _real_extract(self, url):
365 playlist_id = self._match_id(url)
366 webpage = self._download_webpage(url, playlist_id)
367 title = self._og_search_title(webpage, fatal=False)
368 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
371 class YoutubeIE(YoutubeBaseInfoExtractor):
372 IE_DESC = 'YouTube.com'
373 _VALID_URL = r"""(?x)^
375 (?:https?://|//) # http(s):// or protocol-independent URL
376 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
377 (?:www\.)?deturl\.com/www\.youtube\.com/|
378 (?:www\.)?pwnyoutube\.com/|
379 (?:www\.)?hooktube\.com/|
380 (?:www\.)?yourepeat\.com/|
381 tube\.majestyc\.net/|
382 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
383 (?:(?:www|dev)\.)?invidio\.us/|
384 (?:(?:www|no)\.)?invidiou\.sh/|
385 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
386 (?:www\.)?invidious\.kabi\.tk/|
387 (?:www\.)?invidious\.enkirton\.net/|
388 (?:www\.)?invidious\.13ad\.de/|
389 (?:www\.)?invidious\.mastodon\.host/|
390 (?:www\.)?tube\.poal\.co/|
391 (?:www\.)?vid\.wxzm\.sx/|
392 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
393 (?:.*?\#/)? # handle anchor (#/) redirect urls
394 (?: # the various things that can precede the ID:
395 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
396 |(?: # or the v= param in all its forms
397 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
398 (?:\?|\#!?) # the params delimiter ? or # or #!
399 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
404 youtu\.be| # just youtu.be/xxxx
405 vid\.plus| # or vid.plus/xxxx
406 zwearz\.com/watch| # or zwearz.com/watch/xxxx
408 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
410 )? # all until now is optional -> you can pass the naked ID
411 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
414 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
415 WL # WL are handled by the watch later IE
418 (?(1).+)? # if we found the ID, everything can follow
419 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
420 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
422 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
423 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
424 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
425 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
426 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
427 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
429 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
430 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
431 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
432 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
433 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
434 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
435 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
436 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
437 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
438 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
443 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
444 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
445 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
446 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
447 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
448 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
449 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
451 # Apple HTTP Live Streaming
452 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
453 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
455 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
456 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
457 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
458 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
459 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
462 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
463 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
467 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
468 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
469 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
470 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
472 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
473 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
477 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
478 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
479 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
480 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
481 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
482 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
485 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
487 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
488 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
489 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
490 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
491 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
492 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
495 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
496 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
498 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
499 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
500 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
501 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
503 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
504 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
505 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
509 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
510 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
512 # Dash webm audio with opus inside
513 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
514 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
515 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
518 '_rtmp': {'protocol': 'rtmp'},
520 # av01 video only formats sometimes served with "unknown" codecs
521 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
522 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
523 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
524 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
526 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
533 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
537 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
538 'uploader': 'Philipp Hagemeister',
539 'uploader_id': 'phihag',
540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
541 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
542 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
543 'upload_date': '20121002',
544 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
545 'categories': ['Science & Technology'],
546 'tags': ['youtube-dl'],
550 'dislike_count': int,
556 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
557 'note': 'Test generic use_cipher_signature video (#897)',
561 'upload_date': '20120506',
562 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
563 'alt_title': 'I Love It (feat. Charli XCX)',
564 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
565 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
566 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
567 'iconic ep', 'iconic', 'love', 'it'],
569 'uploader': 'Icona Pop',
570 'uploader_id': 'IconaPop',
571 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
572 'creator': 'Icona Pop',
573 'track': 'I Love It (feat. Charli XCX)',
574 'artist': 'Icona Pop',
578 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
579 'note': 'Test VEVO video with age protection (#956)',
583 'upload_date': '20130703',
584 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
585 'alt_title': 'Tunnel Vision',
586 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
588 'uploader': 'justintimberlakeVEVO',
589 'uploader_id': 'justintimberlakeVEVO',
590 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
591 'creator': 'Justin Timberlake',
592 'track': 'Tunnel Vision',
593 'artist': 'Justin Timberlake',
598 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
599 'note': 'Embed-only video (#1746)',
603 'upload_date': '20120608',
604 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
605 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
606 'uploader': 'SET India',
607 'uploader_id': 'setindia',
608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
613 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
614 'note': 'Use the first video ID in the URL',
618 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
619 'uploader': 'Philipp Hagemeister',
620 'uploader_id': 'phihag',
621 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
622 'upload_date': '20121002',
623 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
624 'categories': ['Science & Technology'],
625 'tags': ['youtube-dl'],
629 'dislike_count': int,
632 'skip_download': True,
636 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
637 'note': '256k DASH audio (format 141) via DASH manifest',
641 'upload_date': '20121002',
642 'uploader_id': '8KVIDEO',
643 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
645 'uploader': '8KVIDEO',
646 'title': 'UHDTV TEST 8K VIDEO.mp4'
649 'youtube_include_dash_manifest': True,
652 'skip': 'format 141 not served anymore',
654 # DASH manifest with encrypted signature
656 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
660 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
661 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
663 'uploader': 'AfrojackVEVO',
664 'uploader_id': 'AfrojackVEVO',
665 'upload_date': '20131011',
668 'youtube_include_dash_manifest': True,
669 'format': '141/bestaudio[ext=m4a]',
672 # JS player signature function name containing $
674 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
678 'title': 'Taylor Swift - Shake It Off',
679 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
681 'uploader': 'TaylorSwiftVEVO',
682 'uploader_id': 'TaylorSwiftVEVO',
683 'upload_date': '20140818',
684 'creator': 'Taylor Swift',
687 'youtube_include_dash_manifest': True,
688 'format': '141/bestaudio[ext=m4a]',
693 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
698 'upload_date': '20100909',
699 'uploader': 'Amazing Atheist',
700 'uploader_id': 'TheAmazingAtheist',
701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
702 'title': 'Burning Everyone\'s Koran',
703 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
706 # Normal age-gate video (No vevo, embed allowed)
708 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
712 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
713 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
715 'uploader': 'The Witcher',
716 'uploader_id': 'WitcherGame',
717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
718 'upload_date': '20140605',
722 # Age-gate video with encrypted signature
724 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
728 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
729 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
731 'uploader': 'LloydVEVO',
732 'uploader_id': 'LloydVEVO',
733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
734 'upload_date': '20110629',
738 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
739 # YouTube Red ad is not captured for creator
741 'url': '__2ABJjxzNo',
746 'upload_date': '20100430',
747 'uploader_id': 'deadmau5',
748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
749 'creator': 'deadmau5',
750 'description': 'md5:12c56784b8032162bb936a5f76d55360',
751 'uploader': 'deadmau5',
752 'title': 'Deadmau5 - Some Chords (HD)',
753 'alt_title': 'Some Chords',
755 'expected_warnings': [
756 'DASH manifest missing',
759 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
761 'url': 'lqQg6PlCWgI',
766 'upload_date': '20150827',
767 'uploader_id': 'olympic',
768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
769 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
770 'uploader': 'Olympic',
771 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
774 'skip_download': 'requires avconv',
779 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
783 'stretched_ratio': 16 / 9.,
785 'upload_date': '20110310',
786 'uploader_id': 'AllenMeow',
787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
788 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
790 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
793 # url_encoded_fmt_stream_map is empty string
795 'url': 'qEJwOuvDf7I',
799 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
801 'upload_date': '20150404',
802 'uploader_id': 'spbelect',
803 'uploader': 'Наблюдатели Петербурга',
806 'skip_download': 'requires avconv',
808 'skip': 'This live event has ended.',
810 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
812 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
816 'title': 'md5:7b81415841e02ecd4313668cde88737a',
817 'description': 'md5:116377fd2963b81ec4ce64b542173306',
819 'upload_date': '20150625',
820 'uploader_id': 'dorappi2000',
821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
822 'uploader': 'dorappi2000',
823 'formats': 'mincount:31',
825 'skip': 'not actual anymore',
827 # DASH manifest with segment_list
829 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
830 'md5': '8ce563a1d667b599d21064e982ab9e31',
834 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
835 'uploader': 'Airtek',
836 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
837 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
838 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
841 'youtube_include_dash_manifest': True,
842 'format': '135', # bestvideo
844 'skip': 'This live event has ended.',
847 # Multifeed videos (multiple cameras), URL is for Main Camera
848 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
851 'title': 'teamPGP: Rocket League Noob Stream',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
858 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
859 'description': 'md5:dc7872fb300e143831327f1bae3af010',
861 'upload_date': '20150721',
862 'uploader': 'Beer Games Beer',
863 'uploader_id': 'beergamesbeer',
864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
865 'license': 'Standard YouTube License',
871 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
872 'description': 'md5:dc7872fb300e143831327f1bae3af010',
874 'upload_date': '20150721',
875 'uploader': 'Beer Games Beer',
876 'uploader_id': 'beergamesbeer',
877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
878 'license': 'Standard YouTube License',
884 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
885 'description': 'md5:dc7872fb300e143831327f1bae3af010',
887 'upload_date': '20150721',
888 'uploader': 'Beer Games Beer',
889 'uploader_id': 'beergamesbeer',
890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
891 'license': 'Standard YouTube License',
897 'title': 'teamPGP: Rocket League Noob Stream (zim)',
898 'description': 'md5:dc7872fb300e143831327f1bae3af010',
900 'upload_date': '20150721',
901 'uploader': 'Beer Games Beer',
902 'uploader_id': 'beergamesbeer',
903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
904 'license': 'Standard YouTube License',
908 'skip_download': True,
910 'skip': 'This video is not available.',
913 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
914 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
917 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
920 'skip': 'Not multifeed anymore',
923 'url': 'https://vid.plus/FlRa-iH7PGw',
924 'only_matching': True,
927 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
928 'only_matching': True,
931 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
932 # Also tests cut-off URL expansion in video description (see
933 # https://github.com/ytdl-org/youtube-dl/issues/1892,
934 # https://github.com/ytdl-org/youtube-dl/issues/8164)
935 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
939 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
940 'alt_title': 'Dark Walk - Position Music',
941 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
943 'upload_date': '20151119',
944 'uploader_id': 'IronSoulElf',
945 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
946 'uploader': 'IronSoulElf',
947 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
948 'track': 'Dark Walk - Position Music',
949 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
950 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
953 'skip_download': True,
957 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
958 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
959 'only_matching': True,
962 # Video with yt:stretch=17:0
963 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
967 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
968 'description': 'md5:ee18a25c350637c8faff806845bddee9',
969 'upload_date': '20151107',
970 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
971 'uploader': 'CH GAMER DROID',
974 'skip_download': True,
976 'skip': 'This video does not exist.',
979 # Video licensed under Creative Commons
980 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
984 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
985 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
987 'upload_date': '20150127',
988 'uploader_id': 'BerkmanCenter',
989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
990 'uploader': 'The Berkman Klein Center for Internet & Society',
991 'license': 'Creative Commons Attribution license (reuse allowed)',
994 'skip_download': True,
998 # Channel-like uploader_url
999 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1001 'id': 'eQcmzGIKrzg',
1003 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1004 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1006 'upload_date': '20151119',
1007 'uploader': 'Bernie Sanders',
1008 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1013 'skip_download': True,
1017 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1018 'only_matching': True,
1021 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1022 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1023 'only_matching': True,
1026 # Rental video preview
1027 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1029 'id': 'uGpuVWrhIzE',
1031 'title': 'Piku - Trailer',
1032 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1033 'upload_date': '20150811',
1034 'uploader': 'FlixMatrix',
1035 'uploader_id': 'FlixMatrixKaravan',
1036 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1037 'license': 'Standard YouTube License',
1040 'skip_download': True,
1042 'skip': 'This video is not available.',
1045 # YouTube Red video with episode data
1046 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1048 'id': 'iqKdEhx-dD4',
1050 'title': 'Isolation - Mind Field (Ep 1)',
1051 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1053 'upload_date': '20170118',
1054 'uploader': 'Vsauce',
1055 'uploader_id': 'Vsauce',
1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1057 'series': 'Mind Field',
1059 'episode_number': 1,
1062 'skip_download': True,
1064 'expected_warnings': [
1065 'Skipping DASH manifest',
1069 # The following content has been identified by the YouTube community
1070 # as inappropriate or offensive to some audiences.
1071 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1073 'id': '6SJNVb0GnPI',
1075 'title': 'Race Differences in Intelligence',
1076 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1078 'upload_date': '20140124',
1079 'uploader': 'New Century Foundation',
1080 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1084 'skip_download': True,
1089 'url': '1t24XAntNCY',
1090 'only_matching': True,
1093 # geo restricted to JP
1094 'url': 'sJL6WA-aGkQ',
1095 'only_matching': True,
1098 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1099 'only_matching': True,
1102 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1103 'only_matching': True,
1107 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1108 'only_matching': True,
1111 # Video with unsupported adaptive stream type formats
1112 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1114 'id': 'Z4Vy8R84T1U',
1116 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1117 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1119 'upload_date': '20130923',
1120 'uploader': 'Amelia Putri Harwita',
1121 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1123 'formats': 'maxcount:10',
1126 'skip_download': True,
1127 'youtube_include_dash_manifest': False,
1131 # Youtube Music Auto-generated description
1132 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1134 'id': 'MgNrAu2pzNs',
1136 'title': 'Voyeur Girl',
1137 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1138 'upload_date': '20190312',
1139 'uploader': 'Various Artists - Topic',
1140 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1141 'artist': 'Stephen',
1142 'track': 'Voyeur Girl',
1143 'album': 'it\'s too much love to know my dear',
1144 'release_date': '20190313',
1145 'release_year': 2019,
1148 'skip_download': True,
1152 # Youtube Music Auto-generated description
1153 # Retrieve 'artist' field from 'Artist:' in video description
1154 # when it is present on youtube music video
1155 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1157 'id': 'k0jLE7tTwjY',
1159 'title': 'Latch Feat. Sam Smith',
1160 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1161 'upload_date': '20150110',
1162 'uploader': 'Various Artists - Topic',
1163 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1164 'artist': 'Disclosure',
1165 'track': 'Latch Feat. Sam Smith',
1166 'album': 'Latch Featuring Sam Smith',
1167 'release_date': '20121008',
1168 'release_year': 2012,
1171 'skip_download': True,
1175 # Youtube Music Auto-generated description
1176 # handle multiple artists on youtube music video
1177 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1179 'id': '74qn0eJSjpA',
1181 'title': 'Eastside',
1182 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1183 'upload_date': '20180710',
1184 'uploader': 'Benny Blanco - Topic',
1185 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1186 'artist': 'benny blanco, Halsey, Khalid',
1187 'track': 'Eastside',
1188 'album': 'Eastside',
1189 'release_date': '20180713',
1190 'release_year': 2018,
1193 'skip_download': True,
1197 # Youtube Music Auto-generated description
1198 # handle youtube music video with release_year and no release_date
1199 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1201 'id': '-hcAI0g-f5M',
1203 'title': 'Put It On Me',
1204 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1205 'upload_date': '20180426',
1206 'uploader': 'Matt Maeson - Topic',
1207 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1208 'artist': 'Matt Maeson',
1209 'track': 'Put It On Me',
1210 'album': 'The Hearse',
1211 'release_date': None,
1212 'release_year': 2018,
1215 'skip_download': True,
1220 def __init__(self, *args, **kwargs):
1221 super(YoutubeIE, self).__init__(*args, **kwargs)
1222 self._player_cache = {}
1224 def report_video_info_webpage_download(self, video_id):
1225 """Report attempt to download video info webpage."""
1226 self.to_screen('%s: Downloading video info webpage' % video_id)
1228 def report_information_extraction(self, video_id):
1229 """Report attempt to extract video information."""
1230 self.to_screen('%s: Extracting video information' % video_id)
1232 def report_unavailable_format(self, video_id, format):
1233 """Report extracted video URL."""
1234 self.to_screen('%s: Format %s not available' % (video_id, format))
1236 def report_rtmp_download(self):
1237 """Indicate the download will use the RTMP protocol."""
1238 self.to_screen('RTMP download detected')
1240 def _signature_cache_id(self, example_sig):
1241 """ Return a string representation of a signature """
1242 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1244 def _extract_signature_function(self, video_id, player_url, example_sig):
1246 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1249 raise ExtractorError('Cannot identify player %r' % player_url)
1250 player_type = id_m.group('ext')
1251 player_id = id_m.group('id')
1253 # Read from filesystem cache
1254 func_id = '%s_%s_%s' % (
1255 player_type, player_id, self._signature_cache_id(example_sig))
1256 assert os.path.basename(func_id) == func_id
1258 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1259 if cache_spec is not None:
1260 return lambda s: ''.join(s[i] for i in cache_spec)
1263 'Downloading player %s' % player_url
1264 if self._downloader.params.get('verbose') else
1265 'Downloading %s player %s' % (player_type, player_id)
1267 if player_type == 'js':
1268 code = self._download_webpage(
1269 player_url, video_id,
1271 errnote='Download of %s failed' % player_url)
1272 res = self._parse_sig_js(code)
1273 elif player_type == 'swf':
1274 urlh = self._request_webpage(
1275 player_url, video_id,
1277 errnote='Download of %s failed' % player_url)
1279 res = self._parse_sig_swf(code)
1281 assert False, 'Invalid player type %r' % player_type
1283 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1284 cache_res = res(test_string)
1285 cache_spec = [ord(c) for c in cache_res]
1287 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1290 def _print_sig_code(self, func, example_sig):
1291 def gen_sig_code(idxs):
1292 def _genslice(start, end, step):
1293 starts = '' if start == 0 else str(start)
1294 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1295 steps = '' if step == 1 else (':%d' % step)
1296 return 's[%s%s%s]' % (starts, ends, steps)
1299 # Quelch pyflakes warnings - start will be set when step is set
1300 start = '(Never used)'
1301 for i, prev in zip(idxs[1:], idxs[:-1]):
1302 if step is not None:
1303 if i - prev == step:
1305 yield _genslice(start, prev, step)
1308 if i - prev in [-1, 1]:
1313 yield 's[%d]' % prev
1317 yield _genslice(start, i, step)
1319 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1320 cache_res = func(test_string)
1321 cache_spec = [ord(c) for c in cache_res]
1322 expr_code = ' + '.join(gen_sig_code(cache_spec))
1323 signature_id_tuple = '(%s)' % (
1324 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1325 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1326 ' return %s\n') % (signature_id_tuple, expr_code)
1327 self.to_screen('Extracted signature function:\n' + code)
1329 def _parse_sig_js(self, jscode):
1330 funcname = self._search_regex(
1331 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1332 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1333 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1335 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1336 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1337 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1338 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1343 jscode, 'Initial JS player signature function name', group='sig')
1345 jsi = JSInterpreter(jscode)
1346 initial_function = jsi.extract_function(funcname)
1347 return lambda s: initial_function([s])
1349 def _parse_sig_swf(self, file_contents):
1350 swfi = SWFInterpreter(file_contents)
1351 TARGET_CLASSNAME = 'SignatureDecipher'
1352 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1353 initial_function = swfi.extract_function(searched_class, 'decipher')
1354 return lambda s: initial_function([s])
1356 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1357 """Turn the encrypted s field into a working signature"""
1359 if player_url is None:
1360 raise ExtractorError('Cannot decrypt signature without player_url')
1362 if player_url.startswith('//'):
1363 player_url = 'https:' + player_url
1364 elif not re.match(r'https?://', player_url):
1365 player_url = compat_urlparse.urljoin(
1366 'https://www.youtube.com', player_url)
1368 player_id = (player_url, self._signature_cache_id(s))
1369 if player_id not in self._player_cache:
1370 func = self._extract_signature_function(
1371 video_id, player_url, s
1373 self._player_cache[player_id] = func
1374 func = self._player_cache[player_id]
1375 if self._downloader.params.get('youtube_print_sig_code'):
1376 self._print_sig_code(func, s)
1378 except Exception as e:
1379 tb = traceback.format_exc()
1380 raise ExtractorError(
1381 'Signature extraction failed: ' + tb, cause=e)
1383 def _get_subtitles(self, video_id, webpage):
1385 subs_doc = self._download_xml(
1386 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1387 video_id, note=False)
1388 except ExtractorError as err:
1389 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1393 for track in subs_doc.findall('track'):
1394 lang = track.attrib['lang_code']
1395 if lang in sub_lang_list:
1398 for ext in self._SUBTITLE_FORMATS:
1399 params = compat_urllib_parse_urlencode({
1403 'name': track.attrib['name'].encode('utf-8'),
1405 sub_formats.append({
1406 'url': 'https://www.youtube.com/api/timedtext?' + params,
1409 sub_lang_list[lang] = sub_formats
1410 if not sub_lang_list:
1411 self._downloader.report_warning('video doesn\'t have subtitles')
1413 return sub_lang_list
1415 def _get_ytplayer_config(self, video_id, webpage):
1417 # User data may contain arbitrary character sequences that may affect
1418 # JSON extraction with regex, e.g. when '};' is contained the second
1419 # regex won't capture the whole JSON. Yet working around by trying more
1420 # concrete regex first keeping in mind proper quoted string handling
1421 # to be implemented in future that will replace this workaround (see
1422 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1423 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1424 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1425 r';ytplayer\.config\s*=\s*({.+?});',
1427 config = self._search_regex(
1428 patterns, webpage, 'ytplayer.config', default=None)
1430 return self._parse_json(
1431 uppercase_escape(config), video_id, fatal=False)
1433 def _get_automatic_captions(self, video_id, webpage):
1434 """We need the webpage for getting the captions url, pass it as an
1435 argument to speed up the process."""
1436 self.to_screen('%s: Looking for automatic captions' % video_id)
1437 player_config = self._get_ytplayer_config(video_id, webpage)
1438 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1439 if not player_config:
1440 self._downloader.report_warning(err_msg)
1443 args = player_config['args']
1444 caption_url = args.get('ttsurl')
1446 timestamp = args['timestamp']
1447 # We get the available subtitles
1448 list_params = compat_urllib_parse_urlencode({
1453 list_url = caption_url + '&' + list_params
1454 caption_list = self._download_xml(list_url, video_id)
1455 original_lang_node = caption_list.find('track')
1456 if original_lang_node is None:
1457 self._downloader.report_warning('Video doesn\'t have automatic captions')
1459 original_lang = original_lang_node.attrib['lang_code']
1460 caption_kind = original_lang_node.attrib.get('kind', '')
1463 for lang_node in caption_list.findall('target'):
1464 sub_lang = lang_node.attrib['lang_code']
1466 for ext in self._SUBTITLE_FORMATS:
1467 params = compat_urllib_parse_urlencode({
1468 'lang': original_lang,
1472 'kind': caption_kind,
1474 sub_formats.append({
1475 'url': caption_url + '&' + params,
1478 sub_lang_list[sub_lang] = sub_formats
1479 return sub_lang_list
1481 def make_captions(sub_url, sub_langs):
1482 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1483 caption_qs = compat_parse_qs(parsed_sub_url.query)
1485 for sub_lang in sub_langs:
1487 for ext in self._SUBTITLE_FORMATS:
1489 'tlang': [sub_lang],
1492 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1493 query=compat_urllib_parse_urlencode(caption_qs, True)))
1494 sub_formats.append({
1498 captions[sub_lang] = sub_formats
1501 # New captions format as of 22.06.2017
1502 player_response = args.get('player_response')
1503 if player_response and isinstance(player_response, compat_str):
1504 player_response = self._parse_json(
1505 player_response, video_id, fatal=False)
1507 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1508 base_url = renderer['captionTracks'][0]['baseUrl']
1510 for lang in renderer['translationLanguages']:
1511 lang_code = lang.get('languageCode')
1513 sub_lang_list.append(lang_code)
1514 return make_captions(base_url, sub_lang_list)
1516 # Some videos don't provide ttsurl but rather caption_tracks and
1517 # caption_translation_languages (e.g. 20LmZk1hakA)
1518 # Does not used anymore as of 22.06.2017
1519 caption_tracks = args['caption_tracks']
1520 caption_translation_languages = args['caption_translation_languages']
1521 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1523 for lang in caption_translation_languages.split(','):
1524 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1525 sub_lang = lang_qs.get('lc', [None])[0]
1527 sub_lang_list.append(sub_lang)
1528 return make_captions(caption_url, sub_lang_list)
1529 # An extractor error can be raise by the download process if there are
1530 # no automatic captions but there are subtitles
1531 except (KeyError, IndexError, ExtractorError):
1532 self._downloader.report_warning(err_msg)
1535 def _mark_watched(self, video_id, video_info, player_response):
1536 playback_url = url_or_none(try_get(
1538 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1539 video_info, lambda x: x['videostats_playback_base_url'][0]))
1540 if not playback_url:
1542 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1543 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1545 # cpn generation algorithm is reverse engineered from base.js.
1546 # In fact it works even with dummy cpn.
1547 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1548 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1554 playback_url = compat_urlparse.urlunparse(
1555 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1557 self._download_webpage(
1558 playback_url, video_id, 'Marking watched',
1559 'Unable to mark watched', fatal=False)
1562 def _extract_urls(webpage):
1563 # Embedded YouTube player
1565 unescapeHTML(mobj.group('url'))
1566 for mobj in re.finditer(r'''(?x)
1576 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1577 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1580 # lazyYT YouTube embed
1581 entries.extend(list(map(
1583 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1585 # Wordpress "YouTube Video Importer" plugin
1586 matches = re.findall(r'''(?x)<div[^>]+
1587 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1588 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1589 entries.extend(m[-1] for m in matches)
1594 def _extract_url(webpage):
1595 urls = YoutubeIE._extract_urls(webpage)
1596 return urls[0] if urls else None
1599 def extract_id(cls, url):
1600 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1602 raise ExtractorError('Invalid URL: %s' % url)
1603 video_id = mobj.group(2)
1607 def _extract_chapters(description, duration):
1610 chapter_lines = re.findall(
1611 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1613 if not chapter_lines:
1616 for next_num, (chapter_line, time_point) in enumerate(
1617 chapter_lines, start=1):
1618 start_time = parse_duration(time_point)
1619 if start_time is None:
1621 if start_time > duration:
1623 end_time = (duration if next_num == len(chapter_lines)
1624 else parse_duration(chapter_lines[next_num][1]))
1625 if end_time is None:
1627 if end_time > duration:
1629 if start_time > end_time:
1631 chapter_title = re.sub(
1632 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1633 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1635 'start_time': start_time,
1636 'end_time': end_time,
1637 'title': chapter_title,
1641 def _real_extract(self, url):
1642 url, smuggled_data = unsmuggle_url(url, {})
1645 'http' if self._downloader.params.get('prefer_insecure', False)
1650 parsed_url = compat_urllib_parse_urlparse(url)
1651 for component in [parsed_url.fragment, parsed_url.query]:
1652 query = compat_parse_qs(component)
1653 if start_time is None and 't' in query:
1654 start_time = parse_duration(query['t'][0])
1655 if start_time is None and 'start' in query:
1656 start_time = parse_duration(query['start'][0])
1657 if end_time is None and 'end' in query:
1658 end_time = parse_duration(query['end'][0])
1660 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1661 mobj = re.search(self._NEXT_URL_RE, url)
1663 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1664 video_id = self.extract_id(url)
1667 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1668 video_webpage = self._download_webpage(url, video_id)
1670 # Attempt to extract SWF player URL
1671 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1672 if mobj is not None:
1673 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1679 def add_dash_mpd(video_info):
1680 dash_mpd = video_info.get('dashmpd')
1681 if dash_mpd and dash_mpd[0] not in dash_mpds:
1682 dash_mpds.append(dash_mpd[0])
1684 def add_dash_mpd_pr(pl_response):
1685 dash_mpd = url_or_none(try_get(
1686 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1688 if dash_mpd and dash_mpd not in dash_mpds:
1689 dash_mpds.append(dash_mpd)
1694 def extract_view_count(v_info):
1695 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1697 def extract_token(v_info):
1698 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1700 def extract_player_response(player_response, video_id):
1701 pl_response = str_or_none(player_response)
1704 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1705 if isinstance(pl_response, dict):
1706 add_dash_mpd_pr(pl_response)
1709 player_response = {}
1712 embed_webpage = None
1713 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1715 # We simulate the access to the video from www.youtube.com/v/{video_id}
1716 # this can be viewed without login into Youtube
1717 url = proto + '://www.youtube.com/embed/%s' % video_id
1718 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1719 data = compat_urllib_parse_urlencode({
1720 'video_id': video_id,
1721 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1722 'sts': self._search_regex(
1723 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1725 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1726 video_info_webpage = self._download_webpage(
1727 video_info_url, video_id,
1728 note='Refetching age-gated info webpage',
1729 errnote='unable to download video info webpage')
1730 video_info = compat_parse_qs(video_info_webpage)
1731 pl_response = video_info.get('player_response', [None])[0]
1732 player_response = extract_player_response(pl_response, video_id)
1733 add_dash_mpd(video_info)
1734 view_count = extract_view_count(video_info)
1739 # Try looking directly into the video webpage
1740 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1742 args = ytplayer_config['args']
1743 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1744 # Convert to the same format returned by compat_parse_qs
1745 video_info = dict((k, [v]) for k, v in args.items())
1746 add_dash_mpd(video_info)
1747 # Rental video is not rented but preview is available (e.g.
1748 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1749 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1750 if not video_info and args.get('ypc_vid'):
1751 return self.url_result(
1752 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1753 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1755 sts = ytplayer_config.get('sts')
1756 if not player_response:
1757 player_response = extract_player_response(args.get('player_response'), video_id)
1758 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1759 add_dash_mpd_pr(player_response)
1760 # We also try looking in get_video_info since it may contain different dashmpd
1761 # URL that points to a DASH manifest with possibly different itag set (some itags
1762 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1763 # manifest pointed by get_video_info's dashmpd).
1764 # The general idea is to take a union of itags of both DASH manifests (for example
1765 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1766 self.report_video_info_webpage_download(video_id)
1767 for el in ('embedded', 'detailpage', 'vevo', ''):
1769 'video_id': video_id,
1779 video_info_webpage = self._download_webpage(
1780 '%s://www.youtube.com/get_video_info' % proto,
1781 video_id, note=False,
1782 errnote='unable to download video info webpage',
1783 fatal=False, query=query)
1784 if not video_info_webpage:
1786 get_video_info = compat_parse_qs(video_info_webpage)
1787 if not player_response:
1788 pl_response = get_video_info.get('player_response', [None])[0]
1789 player_response = extract_player_response(pl_response, video_id)
1790 add_dash_mpd(get_video_info)
1791 if view_count is None:
1792 view_count = extract_view_count(get_video_info)
1794 video_info = get_video_info
1795 get_token = extract_token(get_video_info)
1797 # Different get_video_info requests may report different results, e.g.
1798 # some may report video unavailability, but some may serve it without
1799 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1800 # the original webpage as well as el=info and el=embedded get_video_info
1801 # requests report video unavailability due to geo restriction while
1802 # el=detailpage succeeds and returns valid data). This is probably
1803 # due to YouTube measures against IP ranges of hosting providers.
1804 # Working around by preferring the first succeeded video_info containing
1805 # the token if no such video_info yet was found.
1806 token = extract_token(video_info)
1808 video_info = get_video_info
1811 def extract_unavailable_message():
1812 return self._html_search_regex(
1813 (r'(?s)<div[^>]+id=["\']unavailable-submessage["\'][^>]+>(.+?)</div',
1814 r'(?s)<h1[^>]+id=["\']unavailable-message["\'][^>]*>(.+?)</h1>'),
1815 video_webpage, 'unavailable message', default=None)
1818 unavailable_message = extract_unavailable_message()
1819 if not unavailable_message:
1820 unavailable_message = 'Unable to extract video data'
1821 raise ExtractorError(
1822 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1824 video_details = try_get(
1825 player_response, lambda x: x['videoDetails'], dict) or {}
1827 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1829 self._downloader.report_warning('Unable to extract video title')
1832 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1833 if video_description:
1836 redir_url = compat_urlparse.urljoin(url, m.group(1))
1837 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1838 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1839 qs = compat_parse_qs(parsed_redir_url.query)
1845 description_original = video_description = re.sub(r'''(?x)
1847 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1848 (?:title|href)="([^"]+)"\s+
1849 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1853 ''', replace_url, video_description)
1854 video_description = clean_html(video_description)
1856 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1858 if not smuggled_data.get('force_singlefeed', False):
1859 if not self._downloader.params.get('noplaylist'):
1860 multifeed_metadata_list = try_get(
1862 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1863 compat_str) or try_get(
1864 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1865 if multifeed_metadata_list:
1868 for feed in multifeed_metadata_list.split(','):
1869 # Unquote should take place before split on comma (,) since textual
1870 # fields may contain comma as well (see
1871 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1872 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1874 '_type': 'url_transparent',
1875 'ie_key': 'Youtube',
1877 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1878 {'force_singlefeed': True}),
1879 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1881 feed_ids.append(feed_data['id'][0])
1883 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1884 % (', '.join(feed_ids), video_id))
1885 return self.playlist_result(entries, video_id, video_title, video_description)
1887 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1889 if view_count is None:
1890 view_count = extract_view_count(video_info)
1891 if view_count is None and video_details:
1892 view_count = int_or_none(video_details.get('viewCount'))
1895 is_live = bool_or_none(video_details.get('isLive'))
1897 # Check for "rental" videos
1898 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1899 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1901 def _extract_filesize(media_url):
1902 return int_or_none(self._search_regex(
1903 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1905 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1906 self.report_rtmp_download()
1908 'format_id': '_rtmp',
1910 'url': video_info['conn'][0],
1911 'player_url': player_url,
1913 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1914 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1915 if 'rtmpe%3Dyes' in encoded_url_map:
1916 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1918 fmt_list = video_info.get('fmt_list', [''])[0]
1920 for fmt in fmt_list.split(','):
1921 spec = fmt.split('/')
1923 width_height = spec[1].split('x')
1924 if len(width_height) == 2:
1925 formats_spec[spec[0]] = {
1926 'resolution': spec[1],
1927 'width': int_or_none(width_height[0]),
1928 'height': int_or_none(width_height[1]),
1930 q = qualities(['small', 'medium', 'hd720'])
1931 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1932 if streaming_formats:
1933 for fmt in streaming_formats:
1934 itag = str_or_none(fmt.get('itag'))
1937 quality = fmt.get('quality')
1938 quality_label = fmt.get('qualityLabel') or quality
1939 formats_spec[itag] = {
1940 'asr': int_or_none(fmt.get('audioSampleRate')),
1941 'filesize': int_or_none(fmt.get('contentLength')),
1942 'format_note': quality_label,
1943 'fps': int_or_none(fmt.get('fps')),
1944 'height': int_or_none(fmt.get('height')),
1945 'quality': q(quality),
1946 # bitrate for itag 43 is always 2147483647
1947 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1948 'width': int_or_none(fmt.get('width')),
1951 for url_data_str in encoded_url_map.split(','):
1952 url_data = compat_parse_qs(url_data_str)
1953 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1955 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1956 # Unsupported FORMAT_STREAM_TYPE_OTF
1957 if stream_type == 3:
1959 format_id = url_data['itag'][0]
1960 url = url_data['url'][0]
1962 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1963 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1964 jsplayer_url_json = self._search_regex(
1966 embed_webpage if age_gate else video_webpage,
1967 'JS player URL (1)', default=None)
1968 if not jsplayer_url_json and not age_gate:
1969 # We need the embed website after all
1970 if embed_webpage is None:
1971 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1972 embed_webpage = self._download_webpage(
1973 embed_url, video_id, 'Downloading embed webpage')
1974 jsplayer_url_json = self._search_regex(
1975 ASSETS_RE, embed_webpage, 'JS player URL')
1977 player_url = json.loads(jsplayer_url_json)
1978 if player_url is None:
1979 player_url_json = self._search_regex(
1980 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1981 video_webpage, 'age gate player URL')
1982 player_url = json.loads(player_url_json)
1984 if 'sig' in url_data:
1985 url += '&signature=' + url_data['sig'][0]
1986 elif 's' in url_data:
1987 encrypted_sig = url_data['s'][0]
1989 if self._downloader.params.get('verbose'):
1990 if player_url is None:
1991 player_version = 'unknown'
1992 player_desc = 'unknown'
1994 if player_url.endswith('swf'):
1995 player_version = self._search_regex(
1996 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1997 'flash player', fatal=False)
1998 player_desc = 'flash player %s' % player_version
2000 player_version = self._search_regex(
2001 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2002 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2004 'html5 player', fatal=False)
2005 player_desc = 'html5 player %s' % player_version
2007 parts_sizes = self._signature_cache_id(encrypted_sig)
2008 self.to_screen('{%s} signature length %s, %s' %
2009 (format_id, parts_sizes, player_desc))
2011 signature = self._decrypt_signature(
2012 encrypted_sig, video_id, player_url, age_gate)
2013 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2014 url += '&%s=%s' % (sp, signature)
2015 if 'ratebypass' not in url:
2016 url += '&ratebypass=yes'
2019 'format_id': format_id,
2021 'player_url': player_url,
2023 if format_id in self._formats:
2024 dct.update(self._formats[format_id])
2025 if format_id in formats_spec:
2026 dct.update(formats_spec[format_id])
2028 # Some itags are not included in DASH manifest thus corresponding formats will
2029 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2030 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2031 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2032 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2034 filesize = int_or_none(url_data.get(
2035 'clen', [None])[0]) or _extract_filesize(url)
2037 quality = url_data.get('quality', [None])[0]
2040 'filesize': filesize,
2041 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2044 'fps': int_or_none(url_data.get('fps', [None])[0]),
2045 'format_note': url_data.get('quality_label', [None])[0] or quality,
2046 'quality': q(quality),
2048 for key, value in more_fields.items():
2051 type_ = url_data.get('type', [None])[0]
2053 type_split = type_.split(';')
2054 kind_ext = type_split[0].split('/')
2055 if len(kind_ext) == 2:
2057 dct['ext'] = mimetype2ext(type_split[0])
2058 if kind in ('audio', 'video'):
2060 for mobj in re.finditer(
2061 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2062 if mobj.group('key') == 'codecs':
2063 codecs = mobj.group('val')
2066 dct.update(parse_codecs(codecs))
2067 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2068 dct['downloader_options'] = {
2069 # Youtube throttles chunks >~10M
2070 'http_chunk_size': 10485760,
2075 url_or_none(try_get(
2077 lambda x: x['streamingData']['hlsManifestUrl'],
2079 or url_or_none(try_get(
2080 video_info, lambda x: x['hlsvp'][0], compat_str)))
2083 m3u8_formats = self._extract_m3u8_formats(
2084 manifest_url, video_id, 'mp4', fatal=False)
2085 for a_format in m3u8_formats:
2086 itag = self._search_regex(
2087 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2089 a_format['format_id'] = itag
2090 if itag in self._formats:
2091 dct = self._formats[itag].copy()
2092 dct.update(a_format)
2094 a_format['player_url'] = player_url
2095 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2096 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2097 formats.append(a_format)
2099 error_message = extract_unavailable_message()
2100 if not error_message:
2101 error_message = clean_html(try_get(
2102 player_response, lambda x: x['playabilityStatus']['reason'],
2104 if not error_message:
2105 error_message = clean_html(
2106 try_get(video_info, lambda x: x['reason'][0], compat_str))
2108 raise ExtractorError(error_message, expected=True)
2109 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2112 video_uploader = try_get(
2113 video_info, lambda x: x['author'][0],
2114 compat_str) or str_or_none(video_details.get('author'))
2116 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2118 self._downloader.report_warning('unable to extract uploader name')
2121 video_uploader_id = None
2122 video_uploader_url = None
2124 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2126 if mobj is not None:
2127 video_uploader_id = mobj.group('uploader_id')
2128 video_uploader_url = mobj.group('uploader_url')
2130 self._downloader.report_warning('unable to extract uploader nickname')
2133 str_or_none(video_details.get('channelId'))
2134 or self._html_search_meta(
2135 'channelId', video_webpage, 'channel id', default=None)
2136 or self._search_regex(
2137 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2138 video_webpage, 'channel id', default=None, group='id'))
2139 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2142 # We try first to get a high quality image:
2143 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2144 video_webpage, re.DOTALL)
2145 if m_thumb is not None:
2146 video_thumbnail = m_thumb.group(1)
2147 elif 'thumbnail_url' not in video_info:
2148 self._downloader.report_warning('unable to extract video thumbnail')
2149 video_thumbnail = None
2150 else: # don't panic if we can't find it
2151 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2154 upload_date = self._html_search_meta(
2155 'datePublished', video_webpage, 'upload date', default=None)
2157 upload_date = self._search_regex(
2158 [r'(?s)id="eow-date.*?>(.*?)</span>',
2159 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2160 video_webpage, 'upload date', default=None)
2161 upload_date = unified_strdate(upload_date)
2163 video_license = self._html_search_regex(
2164 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2165 video_webpage, 'license', default=None)
2167 m_music = re.search(
2169 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2177 \bhref=["\']/red[^>]*>| # drop possible
2178 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2185 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2186 video_creator = clean_html(m_music.group('creator'))
2188 video_alt_title = video_creator = None
2190 def extract_meta(field):
2191 return self._html_search_regex(
2192 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2193 video_webpage, field, default=None)
2195 track = extract_meta('Song')
2196 artist = extract_meta('Artist')
2197 album = extract_meta('Album')
2199 # Youtube Music Auto-generated description
2200 release_date = release_year = None
2201 if video_description:
2202 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2205 track = mobj.group('track').strip()
2207 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2209 album = mobj.group('album'.strip())
2210 release_year = mobj.group('release_year')
2211 release_date = mobj.group('release_date')
2213 release_date = release_date.replace('-', '')
2214 if not release_year:
2215 release_year = int(release_date[:4])
2217 release_year = int(release_year)
2219 m_episode = re.search(
2220 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2223 series = unescapeHTML(m_episode.group('series'))
2224 season_number = int(m_episode.group('season'))
2225 episode_number = int(m_episode.group('episode'))
2227 series = season_number = episode_number = None
2229 m_cat_container = self._search_regex(
2230 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2231 video_webpage, 'categories', default=None)
2233 category = self._html_search_regex(
2234 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2236 video_categories = None if category is None else [category]
2238 video_categories = None
2241 unescapeHTML(m.group('content'))
2242 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2244 def _extract_count(count_name):
2245 return str_to_int(self._search_regex(
2246 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2247 % re.escape(count_name),
2248 video_webpage, count_name, default=None))
2250 like_count = _extract_count('like')
2251 dislike_count = _extract_count('dislike')
2253 if view_count is None:
2254 view_count = str_to_int(self._search_regex(
2255 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2256 'view count', default=None))
2259 float_or_none(video_details.get('averageRating'))
2260 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2263 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2264 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2266 video_duration = try_get(
2267 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2268 if not video_duration:
2269 video_duration = int_or_none(video_details.get('lengthSeconds'))
2270 if not video_duration:
2271 video_duration = parse_duration(self._html_search_meta(
2272 'duration', video_webpage, 'video duration'))
2275 video_annotations = None
2276 if self._downloader.params.get('writeannotations', False):
2277 xsrf_token = self._search_regex(
2278 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2279 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2280 invideo_url = try_get(
2281 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2282 if xsrf_token and invideo_url:
2283 xsrf_field_name = self._search_regex(
2284 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2285 video_webpage, 'xsrf field name',
2286 group='xsrf_field_name', default='session_token')
2287 video_annotations = self._download_webpage(
2288 self._proto_relative_url(invideo_url),
2289 video_id, note='Downloading annotations',
2290 errnote='Unable to download video annotations', fatal=False,
2291 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2293 chapters = self._extract_chapters(description_original, video_duration)
2295 # Look for the DASH manifest
2296 if self._downloader.params.get('youtube_include_dash_manifest', True):
2297 dash_mpd_fatal = True
2298 for mpd_url in dash_mpds:
2301 def decrypt_sig(mobj):
2303 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2304 return '/signature/%s' % dec_s
2306 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2308 for df in self._extract_mpd_formats(
2309 mpd_url, video_id, fatal=dash_mpd_fatal,
2310 formats_dict=self._formats):
2311 if not df.get('filesize'):
2312 df['filesize'] = _extract_filesize(df['url'])
2313 # Do not overwrite DASH format found in some previous DASH manifest
2314 if df['format_id'] not in dash_formats:
2315 dash_formats[df['format_id']] = df
2316 # Additional DASH manifests may end up in HTTP Error 403 therefore
2317 # allow them to fail without bug report message if we already have
2318 # some DASH manifest succeeded. This is temporary workaround to reduce
2319 # burst of bug reports until we figure out the reason and whether it
2320 # can be fixed at all.
2321 dash_mpd_fatal = False
2322 except (ExtractorError, KeyError) as e:
2323 self.report_warning(
2324 'Skipping DASH manifest: %r' % e, video_id)
2326 # Remove the formats we found through non-DASH, they
2327 # contain less info and it can be wrong, because we use
2328 # fixed values (for example the resolution). See
2329 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2331 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2332 formats.extend(dash_formats.values())
2334 # Check for malformed aspect ratio
2335 stretched_m = re.search(
2336 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2339 w = float(stretched_m.group('w'))
2340 h = float(stretched_m.group('h'))
2341 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2342 # We will only process correct ratios.
2346 if f.get('vcodec') != 'none':
2347 f['stretched_ratio'] = ratio
2350 token = extract_token(video_info)
2352 if 'reason' in video_info:
2353 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2354 regions_allowed = self._html_search_meta(
2355 'regionsAllowed', video_webpage, default=None)
2356 countries = regions_allowed.split(',') if regions_allowed else None
2357 self.raise_geo_restricted(
2358 msg=video_info['reason'][0], countries=countries)
2359 reason = video_info['reason'][0]
2360 if 'Invalid parameters' in reason:
2361 unavailable_message = extract_unavailable_message()
2362 if unavailable_message:
2363 reason = unavailable_message
2364 raise ExtractorError(
2365 'YouTube said: %s' % reason,
2366 expected=True, video_id=video_id)
2368 raise ExtractorError(
2369 '"token" parameter not in video info for unknown reason',
2372 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2373 raise ExtractorError('This video is DRM protected.', expected=True)
2375 self._sort_formats(formats)
2377 self.mark_watched(video_id, video_info, player_response)
2381 'uploader': video_uploader,
2382 'uploader_id': video_uploader_id,
2383 'uploader_url': video_uploader_url,
2384 'channel_id': channel_id,
2385 'channel_url': channel_url,
2386 'upload_date': upload_date,
2387 'license': video_license,
2388 'creator': video_creator or artist,
2389 'title': video_title,
2390 'alt_title': video_alt_title or track,
2391 'thumbnail': video_thumbnail,
2392 'description': video_description,
2393 'categories': video_categories,
2395 'subtitles': video_subtitles,
2396 'automatic_captions': automatic_captions,
2397 'duration': video_duration,
2398 'age_limit': 18 if age_gate else 0,
2399 'annotations': video_annotations,
2400 'chapters': chapters,
2401 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2402 'view_count': view_count,
2403 'like_count': like_count,
2404 'dislike_count': dislike_count,
2405 'average_rating': average_rating,
2408 'start_time': start_time,
2409 'end_time': end_time,
2411 'season_number': season_number,
2412 'episode_number': episode_number,
2416 'release_date': release_date,
2417 'release_year': release_year,
2421 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2422 IE_DESC = 'YouTube.com playlists'
2423 _VALID_URL = r"""(?x)(?:
2433 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2434 \? (?:.*?[&;])*? (?:p|a|list)=
2437 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2440 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2441 # Top tracks, they can also include dots
2447 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2448 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2449 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2450 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2451 IE_NAME = 'youtube:playlist'
2453 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2455 'title': 'ytdl test PL',
2456 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2458 'playlist_count': 3,
2460 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2462 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2463 'title': 'YDL_Empty_List',
2465 'playlist_count': 0,
2466 'skip': 'This playlist is private',
2468 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2469 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2471 'title': '29C3: Not my department',
2472 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2473 'uploader': 'Christiaan008',
2474 'uploader_id': 'ChRiStIaAn008',
2476 'playlist_count': 95,
2478 'note': 'issue #673',
2479 'url': 'PLBB231211A4F62143',
2481 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2482 'id': 'PLBB231211A4F62143',
2483 'uploader': 'Wickydoo',
2484 'uploader_id': 'Wickydoo',
2486 'playlist_mincount': 26,
2488 'note': 'Large playlist',
2489 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2491 'title': 'Uploads from Cauchemar',
2492 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2493 'uploader': 'Cauchemar',
2494 'uploader_id': 'Cauchemar89',
2496 'playlist_mincount': 799,
2498 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2500 'title': 'YDL_safe_search',
2501 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2503 'playlist_count': 2,
2504 'skip': 'This playlist is private',
2507 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2508 'playlist_count': 4,
2511 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2512 'uploader': 'milan',
2513 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2516 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2517 'playlist_mincount': 485,
2519 'title': '2018 Chinese New Singles (11/6 updated)',
2520 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2522 'uploader_id': 'sdragonfang',
2525 'note': 'Embedded SWF player',
2526 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2527 'playlist_count': 4,
2530 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2532 'skip': 'This playlist does not exist',
2534 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2535 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2537 'title': 'Uploads from Interstellar Movie',
2538 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2539 'uploader': 'Interstellar Movie',
2540 'uploader_id': 'InterstellarMovie1',
2542 'playlist_mincount': 21,
2544 # Playlist URL that does not actually serve a playlist
2545 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2547 'id': 'FqZTN594JQw',
2549 'title': "Smiley's People 01 detective, Adventure Series, Action",
2550 'uploader': 'STREEM',
2551 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2552 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2553 'upload_date': '20150526',
2554 'license': 'Standard YouTube License',
2555 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2556 'categories': ['People & Blogs'],
2560 'dislike_count': int,
2563 'skip_download': True,
2565 'skip': 'This video is not available.',
2566 'add_ie': [YoutubeIE.ie_key()],
2568 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2570 'id': 'yeWKywCrFtk',
2572 'title': 'Small Scale Baler and Braiding Rugs',
2573 'uploader': 'Backus-Page House Museum',
2574 'uploader_id': 'backuspagemuseum',
2575 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2576 'upload_date': '20161008',
2577 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2578 'categories': ['Nonprofits & Activism'],
2581 'dislike_count': int,
2585 'skip_download': True,
2588 # https://github.com/ytdl-org/youtube-dl/issues/21844
2589 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2591 'title': 'Data Analysis with Dr Mike Pound',
2592 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2593 'uploader_id': 'Computerphile',
2594 'uploader': 'Computerphile',
2596 'playlist_mincount': 11,
2598 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2599 'only_matching': True,
2601 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2602 'only_matching': True,
2604 # music album playlist
2605 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2606 'only_matching': True,
2608 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2609 'only_matching': True,
2612 def _real_initialize(self):
2615 def extract_videos_from_page(self, page):
2619 for item in re.findall(
2620 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2621 attrs = extract_attributes(item)
2622 video_id = attrs['data-video-id']
2623 video_title = unescapeHTML(attrs.get('data-title'))
2625 video_title = video_title.strip()
2626 ids_in_page.append(video_id)
2627 titles_in_page.append(video_title)
2629 # Fallback with old _VIDEO_RE
2630 self.extract_videos_from_page_impl(
2631 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2634 self.extract_videos_from_page_impl(
2635 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2636 ids_in_page, titles_in_page)
2637 self.extract_videos_from_page_impl(
2638 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2639 ids_in_page, titles_in_page)
2641 return zip(ids_in_page, titles_in_page)
2643 def _extract_mix(self, playlist_id):
2644 # The mixes are generated from a single video
2645 # the id of the playlist is just 'RD' + video_id
2647 last_id = playlist_id[-11:]
2648 for n in itertools.count(1):
2649 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2650 webpage = self._download_webpage(
2651 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2652 new_ids = orderedSet(re.findall(
2653 r'''(?xs)data-video-username=".*?".*?
2654 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2656 # Fetch new pages until all the videos are repeated, it seems that
2657 # there are always 51 unique videos.
2658 new_ids = [_id for _id in new_ids if _id not in ids]
2664 url_results = self._ids_to_results(ids)
2666 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2668 search_title('playlist-title')
2669 or search_title('title long-title')
2670 or search_title('title'))
2671 title = clean_html(title_span)
2673 return self.playlist_result(url_results, playlist_id, title)
2675 def _extract_playlist(self, playlist_id):
2676 url = self._TEMPLATE_URL % playlist_id
2677 page = self._download_webpage(url, playlist_id)
2679 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2680 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2681 match = match.strip()
2682 # Check if the playlist exists or is private
2683 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2685 reason = mobj.group('reason')
2686 message = 'This playlist %s' % reason
2687 if 'private' in reason:
2688 message += ', use --username or --netrc to access it'
2690 raise ExtractorError(message, expected=True)
2691 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2692 raise ExtractorError(
2693 'Invalid parameters. Maybe URL is incorrect.',
2695 elif re.match(r'[^<]*Choose your language[^<]*', match):
2698 self.report_warning('Youtube gives an alert message: ' + match)
2700 playlist_title = self._html_search_regex(
2701 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2702 page, 'title', default=None)
2704 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2705 uploader = self._search_regex(
2706 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2707 page, 'uploader', default=None)
2709 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2712 uploader_id = mobj.group('uploader_id')
2713 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2715 uploader_id = uploader_url = None
2719 if not playlist_title:
2721 # Some playlist URLs don't actually serve a playlist (e.g.
2722 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2723 next(self._entries(page, playlist_id))
2724 except StopIteration:
2727 playlist = self.playlist_result(
2728 self._entries(page, playlist_id), playlist_id, playlist_title)
2730 'uploader': uploader,
2731 'uploader_id': uploader_id,
2732 'uploader_url': uploader_url,
2735 return has_videos, playlist
2737 def _check_download_just_video(self, url, playlist_id):
2738 # Check if it's a video-specific URL
2739 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2740 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2741 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2742 'video id', default=None)
2744 if self._downloader.params.get('noplaylist'):
2745 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2746 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2748 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2749 return video_id, None
2752 def _real_extract(self, url):
2753 # Extract playlist id
2754 mobj = re.match(self._VALID_URL, url)
2756 raise ExtractorError('Invalid URL: %s' % url)
2757 playlist_id = mobj.group(1) or mobj.group(2)
2759 video_id, video = self._check_download_just_video(url, playlist_id)
2763 if playlist_id.startswith(('RD', 'UL', 'PU')):
2764 # Mixes require a custom extraction process
2765 return self._extract_mix(playlist_id)
2767 has_videos, playlist = self._extract_playlist(playlist_id)
2768 if has_videos or not video_id:
2771 # Some playlist URLs don't actually serve a playlist (see
2772 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2773 # Fallback to plain video extraction if there is a video id
2774 # along with playlist id.
2775 return self.url_result(video_id, 'Youtube', video_id=video_id)
2778 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2779 IE_DESC = 'YouTube.com channels'
2780 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2781 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2782 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2783 IE_NAME = 'youtube:channel'
2785 'note': 'paginated channel',
2786 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2787 'playlist_mincount': 91,
2789 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2790 'title': 'Uploads from lex will',
2791 'uploader': 'lex will',
2792 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2795 'note': 'Age restricted channel',
2796 # from https://www.youtube.com/user/DeusExOfficial
2797 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2798 'playlist_mincount': 64,
2800 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2801 'title': 'Uploads from Deus Ex',
2802 'uploader': 'Deus Ex',
2803 'uploader_id': 'DeusExOfficial',
2806 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2807 'only_matching': True,
2811 def suitable(cls, url):
2812 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2813 else super(YoutubeChannelIE, cls).suitable(url))
2815 def _build_template_url(self, url, channel_id):
2816 return self._TEMPLATE_URL % channel_id
2818 def _real_extract(self, url):
2819 channel_id = self._match_id(url)
2821 url = self._build_template_url(url, channel_id)
2823 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2824 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2825 # otherwise fallback on channel by page extraction
2826 channel_page = self._download_webpage(
2827 url + '?view=57', channel_id,
2828 'Downloading channel page', fatal=False)
2829 if channel_page is False:
2830 channel_playlist_id = False
2832 channel_playlist_id = self._html_search_meta(
2833 'channelId', channel_page, 'channel id', default=None)
2834 if not channel_playlist_id:
2835 channel_url = self._html_search_meta(
2836 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2837 channel_page, 'channel url', default=None)
2839 channel_playlist_id = self._search_regex(
2840 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2841 channel_url, 'channel id', default=None)
2842 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2843 playlist_id = 'UU' + channel_playlist_id[2:]
2844 return self.url_result(
2845 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2847 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2848 autogenerated = re.search(r'''(?x)
2850 channel-header-autogenerated-label|
2851 yt-channel-title-autogenerated
2852 )[^"]*"''', channel_page) is not None
2855 # The videos are contained in a single page
2856 # the ajax pages can't be used, they are empty
2859 video_id, 'Youtube', video_id=video_id,
2860 video_title=video_title)
2861 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2862 return self.playlist_result(entries, channel_id)
2865 next(self._entries(channel_page, channel_id))
2866 except StopIteration:
2867 alert_message = self._html_search_regex(
2868 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2869 channel_page, 'alert', default=None, group='alert')
2871 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2873 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2876 class YoutubeUserIE(YoutubeChannelIE):
2877 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2878 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2879 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2880 IE_NAME = 'youtube:user'
2883 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2884 'playlist_mincount': 320,
2886 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2887 'title': 'Uploads from The Linux Foundation',
2888 'uploader': 'The Linux Foundation',
2889 'uploader_id': 'TheLinuxFoundation',
2892 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2893 # but not https://www.youtube.com/user/12minuteathlete/videos
2894 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2895 'playlist_mincount': 249,
2897 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2898 'title': 'Uploads from 12 Minute Athlete',
2899 'uploader': '12 Minute Athlete',
2900 'uploader_id': 'the12minuteathlete',
2903 'url': 'ytuser:phihag',
2904 'only_matching': True,
2906 'url': 'https://www.youtube.com/c/gametrailers',
2907 'only_matching': True,
2909 'url': 'https://www.youtube.com/gametrailers',
2910 'only_matching': True,
2912 # This channel is not available, geo restricted to JP
2913 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2914 'only_matching': True,
2918 def suitable(cls, url):
2919 # Don't return True if the url can be extracted with other youtube
2920 # extractor, the regex would is too permissive and it would match.
2921 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2922 if any(ie.suitable(url) for ie in other_yt_ies):
2925 return super(YoutubeUserIE, cls).suitable(url)
2927 def _build_template_url(self, url, channel_id):
2928 mobj = re.match(self._VALID_URL, url)
2929 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2932 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2933 IE_DESC = 'YouTube.com live streams'
2934 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2935 IE_NAME = 'youtube:live'
2938 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2940 'id': 'a48o2S1cPoo',
2942 'title': 'The Young Turks - Live Main Show',
2943 'uploader': 'The Young Turks',
2944 'uploader_id': 'TheYoungTurks',
2945 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2946 'upload_date': '20150715',
2947 'license': 'Standard YouTube License',
2948 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2949 'categories': ['News & Politics'],
2950 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2952 'dislike_count': int,
2955 'skip_download': True,
2958 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2959 'only_matching': True,
2961 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2962 'only_matching': True,
2964 'url': 'https://www.youtube.com/TheYoungTurks/live',
2965 'only_matching': True,
2968 def _real_extract(self, url):
2969 mobj = re.match(self._VALID_URL, url)
2970 channel_id = mobj.group('id')
2971 base_url = mobj.group('base_url')
2972 webpage = self._download_webpage(url, channel_id, fatal=False)
2974 page_type = self._og_search_property(
2975 'type', webpage, 'page type', default='')
2976 video_id = self._html_search_meta(
2977 'videoId', webpage, 'video id', default=None)
2978 if page_type.startswith('video') and video_id and re.match(
2979 r'^[0-9A-Za-z_-]{11}$', video_id):
2980 return self.url_result(video_id, YoutubeIE.ie_key())
2981 return self.url_result(base_url)
2984 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2985 IE_DESC = 'YouTube.com user/channel playlists'
2986 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2987 IE_NAME = 'youtube:playlists'
2990 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2991 'playlist_mincount': 4,
2993 'id': 'ThirstForScience',
2994 'title': 'ThirstForScience',
2997 # with "Load more" button
2998 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2999 'playlist_mincount': 70,
3002 'title': 'Игорь Клейнер',
3005 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3006 'playlist_mincount': 17,
3008 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3009 'title': 'Chem Player',
3015 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3016 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3019 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3020 IE_DESC = 'YouTube.com searches'
3021 # there doesn't appear to be a real limit, for example if you search for
3022 # 'python' you get more than 8.000.000 results
3023 _MAX_RESULTS = float('inf')
3024 IE_NAME = 'youtube:search'
3025 _SEARCH_KEY = 'ytsearch'
3026 _EXTRA_QUERY_ARGS = {}
3029 def _get_n_results(self, query, n):
3030 """Get a specified number of results for a query"""
3036 'search_query': query.encode('utf-8'),
3038 url_query.update(self._EXTRA_QUERY_ARGS)
3039 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3041 for pagenum in itertools.count(1):
3042 data = self._download_json(
3043 result_url, video_id='query "%s"' % query,
3044 note='Downloading page %s' % pagenum,
3045 errnote='Unable to download API page',
3046 query={'spf': 'navigate'})
3047 html_content = data[1]['body']['content']
3049 if 'class="search-message' in html_content:
3050 raise ExtractorError(
3051 '[youtube] No video results', expected=True)
3053 new_videos = list(self._process_page(html_content))
3054 videos += new_videos
3055 if not new_videos or len(videos) > limit:
3057 next_link = self._html_search_regex(
3058 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3059 html_content, 'next link', default=None)
3060 if next_link is None:
3062 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3066 return self.playlist_result(videos, query)
3069 class YoutubeSearchDateIE(YoutubeSearchIE):
3070 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3071 _SEARCH_KEY = 'ytsearchdate'
3072 IE_DESC = 'YouTube.com searches, newest videos first'
3073 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3076 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3077 IE_DESC = 'YouTube.com search URLs'
3078 IE_NAME = 'youtube:search_url'
3079 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3081 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3082 'playlist_mincount': 5,
3084 'title': 'youtube-dl test video',
3087 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3088 'only_matching': True,
3091 def _real_extract(self, url):
3092 mobj = re.match(self._VALID_URL, url)
3093 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3094 webpage = self._download_webpage(url, query)
3095 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3098 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3099 IE_DESC = 'YouTube.com (multi-season) shows'
3100 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3101 IE_NAME = 'youtube:show'
3103 'url': 'https://www.youtube.com/show/airdisasters',
3104 'playlist_mincount': 5,
3106 'id': 'airdisasters',
3107 'title': 'Air Disasters',
3111 def _real_extract(self, url):
3112 playlist_id = self._match_id(url)
3113 return super(YoutubeShowIE, self)._real_extract(
3114 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3117 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3119 Base class for feed extractors
3120 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3122 _LOGIN_REQUIRED = True
3126 return 'youtube:%s' % self._FEED_NAME
3128 def _real_initialize(self):
3131 def _entries(self, page):
3132 # The extraction process is the same as for playlists, but the regex
3133 # for the video ids doesn't contain an index
3135 more_widget_html = content_html = page
3136 for page_num in itertools.count(1):
3137 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3139 # 'recommended' feed has infinite 'load more' and each new portion spins
3140 # the same videos in (sometimes) slightly different order, so we'll check
3141 # for unicity and break when portion has no new videos
3142 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3148 for entry in self._ids_to_results(new_ids):
3151 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3155 more = self._download_json(
3156 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3157 'Downloading page #%s' % page_num,
3158 transform_source=uppercase_escape)
3159 content_html = more['content_html']
3160 more_widget_html = more['load_more_widget_html']
3162 def _real_extract(self, url):
3163 page = self._download_webpage(
3164 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3165 self._PLAYLIST_TITLE)
3166 return self.playlist_result(
3167 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3170 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3171 IE_NAME = 'youtube:watchlater'
3172 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3173 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3176 'url': 'https://www.youtube.com/playlist?list=WL',
3177 'only_matching': True,
3179 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3180 'only_matching': True,
3183 def _real_extract(self, url):
3184 _, video = self._check_download_just_video(url, 'WL')
3187 _, playlist = self._extract_playlist('WL')
3191 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3192 IE_NAME = 'youtube:favorites'
3193 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3194 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3195 _LOGIN_REQUIRED = True
3197 def _real_extract(self, url):
3198 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3199 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3200 return self.url_result(playlist_id, 'YoutubePlaylist')
3203 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3204 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3205 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3206 _FEED_NAME = 'recommended'
3207 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3210 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3211 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3212 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3213 _FEED_NAME = 'subscriptions'
3214 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3217 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3218 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3219 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3220 _FEED_NAME = 'history'
3221 _PLAYLIST_TITLE = 'Youtube History'
3224 class YoutubeTruncatedURLIE(InfoExtractor):
3225 IE_NAME = 'youtube:truncated_url'
3226 IE_DESC = False # Do not list
3227 _VALID_URL = r'''(?x)
3229 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3232 annotation_id=annotation_[^&]+|
3238 attribution_link\?a=[^&]+
3244 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3245 'only_matching': True,
3247 'url': 'https://www.youtube.com/watch?',
3248 'only_matching': True,
3250 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3251 'only_matching': True,
3253 'url': 'https://www.youtube.com/watch?feature=foo',
3254 'only_matching': True,
3256 'url': 'https://www.youtube.com/watch?hl=en-GB',
3257 'only_matching': True,
3259 'url': 'https://www.youtube.com/watch?t=2372',
3260 'only_matching': True,
3263 def _real_extract(self, url):
3264 raise ExtractorError(
3265 'Did you forget to quote the URL? Remember that & is a meta '
3266 'character in most shells, so you want to put the URL in quotes, '
3268 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3269 ' or simply youtube-dl BaW_jenozKc .',
3273 class YoutubeTruncatedIDIE(InfoExtractor):
3274 IE_NAME = 'youtube:truncated_id'
3275 IE_DESC = False # Do not list
3276 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3279 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3280 'only_matching': True,
3283 def _real_extract(self, url):
3284 video_id = self._match_id(url)
3285 raise ExtractorError(
3286 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),