3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
37 get_element_by_attribute,
59 class YoutubeBaseInfoExtractor(InfoExtractor):
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
74 def _set_language(self):
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
77 # YouTube sets the expire time to about two months
78 expire_time=time.time() + 2 * 30 * 24 * 3600)
80 def _ids_to_results(self, ids):
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
93 username, password = self._get_login_info()
94 # No authentication to be performed
96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
104 if login_page is False:
107 login_form = self._hidden_inputs(login_page)
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
117 'f.req': json.dumps(f_req),
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
133 self._downloader.report_warning(message)
137 None, [], None, 'US', None, None, 2, False, True,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
143 1, [None, None, []], None, None, None, True
148 lookup_results = req(
149 self._LOOKUP_URL, lookup_req,
150 'Looking up account info', 'Unable to look up account info')
152 if lookup_results is False:
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
157 warn('Unable to extract user hash')
162 None, 1, None, [1, None, None, None, [password, None, True]],
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
172 if challenge_results is False:
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
185 warn('Unable to extract result entry')
188 login_challenge = try_get(res, lambda x: x[0][0], list)
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
194 status = try_get(login_challenge, lambda x: x[5], compat_str)
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
201 warn('Unable to extract TL')
204 tfa_code = self._get_tfa_info('2-step verification code')
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
212 tfa_code = remove_start(tfa_code, 'G-')
215 user_hash, None, 2, None,
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
225 if tfa_results is False:
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
244 challenge = CHALLENGES.get(
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
256 check_cookie_results = self._download_webpage(
257 check_cookie_url, None, 'Checking cookie', fatal=False)
259 if check_cookie_results is False:
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
268 def _download_webpage_handle(self, *args, **kwargs):
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
273 *args, **compat_kwargs(kwargs))
275 def _real_initialize(self):
276 if self._downloader is None:
279 if not self._login():
283 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
284 # Extract entries from page with "Load more" button
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
288 for entry in self._process_page(content_html):
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
297 while count <= retries:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
319 more_widget_html = more['load_more_widget_html']
322 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
332 video_id = mobj.group('id')
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
336 video_title = video_title.strip()
337 if video_title == '► Play all':
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
347 def extract_videos_from_page(self, page):
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
352 return zip(ids_in_page, titles_in_page)
355 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
366 title = self._og_search_title(webpage, fatal=False)
367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
370 class YoutubeIE(YoutubeBaseInfoExtractor):
371 IE_DESC = 'YouTube.com'
372 _VALID_URL = r"""(?x)^
374 (?:https?://|//) # http(s):// or protocol-independent URL
375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
376 (?:www\.)?deturl\.com/www\.youtube\.com/|
377 (?:www\.)?pwnyoutube\.com/|
378 (?:www\.)?hooktube\.com/|
379 (?:www\.)?yourepeat\.com/|
380 tube\.majestyc\.net/|
381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
382 (?:(?:www|dev)\.)?invidio\.us/|
383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
385 (?:www\.)?invidious\.kabi\.tk/|
386 (?:www\.)?invidious\.enkirton\.net/|
387 (?:www\.)?invidious\.13ad\.de/|
388 (?:www\.)?invidious\.mastodon\.host/|
389 (?:www\.)?invidious\.nixnet\.xyz/|
390 (?:www\.)?invidious\.drycat\.fr/|
391 (?:www\.)?tube\.poal\.co/|
392 (?:www\.)?vid\.wxzm\.sx/|
393 (?:www\.)?yt\.elukerio\.org/|
394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
404 |(?: # or the v= param in all its forms
405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
406 (?:\?|\#!?) # the params delimiter ? or # or #!
407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
412 youtu\.be| # just youtu.be/xxxx
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
418 )? # all until now is optional -> you can pass the naked ID
419 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
426 (?(1).+)? # if we found the ID, everything can follow
427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
428 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
430 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
431 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
433 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
434 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
435 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
436 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
437 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
439 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
440 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
443 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
445 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
446 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
447 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
452 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
454 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
456 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
457 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459 # Apple HTTP Live Streaming
460 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
461 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
463 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
465 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
470 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
476 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
480 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
484 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
485 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
486 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
487 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
488 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
490 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
493 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
494 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
500 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
509 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
511 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
518 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
520 # Dash webm audio with opus inside
521 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
522 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
523 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
526 '_rtmp': {'protocol': 'rtmp'},
528 # av01 video only formats sometimes served with "unknown" codecs
529 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
530 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
545 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
546 'uploader': 'Philipp Hagemeister',
547 'uploader_id': 'phihag',
548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
549 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
550 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
551 'upload_date': '20121002',
552 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553 'categories': ['Science & Technology'],
554 'tags': ['youtube-dl'],
558 'dislike_count': int,
564 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
565 'note': 'Test generic use_cipher_signature video (#897)',
569 'upload_date': '20120506',
570 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
571 'alt_title': 'I Love It (feat. Charli XCX)',
572 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
573 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
574 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
575 'iconic ep', 'iconic', 'love', 'it'],
577 'uploader': 'Icona Pop',
578 'uploader_id': 'IconaPop',
579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
580 'creator': 'Icona Pop',
581 'track': 'I Love It (feat. Charli XCX)',
582 'artist': 'Icona Pop',
586 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
587 'note': 'Test VEVO video with age protection (#956)',
591 'upload_date': '20130703',
592 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
593 'alt_title': 'Tunnel Vision',
594 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
596 'uploader': 'justintimberlakeVEVO',
597 'uploader_id': 'justintimberlakeVEVO',
598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
599 'creator': 'Justin Timberlake',
600 'track': 'Tunnel Vision',
601 'artist': 'Justin Timberlake',
606 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
607 'note': 'Embed-only video (#1746)',
611 'upload_date': '20120608',
612 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
613 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
614 'uploader': 'SET India',
615 'uploader_id': 'setindia',
616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
621 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
622 'note': 'Use the first video ID in the URL',
626 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
627 'uploader': 'Philipp Hagemeister',
628 'uploader_id': 'phihag',
629 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
630 'upload_date': '20121002',
631 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
632 'categories': ['Science & Technology'],
633 'tags': ['youtube-dl'],
637 'dislike_count': int,
640 'skip_download': True,
644 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
645 'note': '256k DASH audio (format 141) via DASH manifest',
649 'upload_date': '20121002',
650 'uploader_id': '8KVIDEO',
651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
653 'uploader': '8KVIDEO',
654 'title': 'UHDTV TEST 8K VIDEO.mp4'
657 'youtube_include_dash_manifest': True,
660 'skip': 'format 141 not served anymore',
662 # DASH manifest with encrypted signature
664 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
668 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
669 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
671 'uploader': 'AfrojackVEVO',
672 'uploader_id': 'AfrojackVEVO',
673 'upload_date': '20131011',
676 'youtube_include_dash_manifest': True,
677 'format': '141/bestaudio[ext=m4a]',
680 # JS player signature function name containing $
682 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
686 'title': 'Taylor Swift - Shake It Off',
687 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
689 'uploader': 'TaylorSwiftVEVO',
690 'uploader_id': 'TaylorSwiftVEVO',
691 'upload_date': '20140818',
692 'creator': 'Taylor Swift',
695 'youtube_include_dash_manifest': True,
696 'format': '141/bestaudio[ext=m4a]',
701 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
706 'upload_date': '20100909',
707 'uploader': 'Amazing Atheist',
708 'uploader_id': 'TheAmazingAtheist',
709 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
710 'title': 'Burning Everyone\'s Koran',
711 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
714 # Normal age-gate video (No vevo, embed allowed)
716 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
720 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
721 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
723 'uploader': 'The Witcher',
724 'uploader_id': 'WitcherGame',
725 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
726 'upload_date': '20140605',
730 # Age-gate video with encrypted signature
732 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
736 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
737 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
739 'uploader': 'LloydVEVO',
740 'uploader_id': 'LloydVEVO',
741 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
742 'upload_date': '20110629',
746 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
747 # YouTube Red ad is not captured for creator
749 'url': '__2ABJjxzNo',
754 'upload_date': '20100430',
755 'uploader_id': 'deadmau5',
756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
757 'creator': 'deadmau5',
758 'description': 'md5:12c56784b8032162bb936a5f76d55360',
759 'uploader': 'deadmau5',
760 'title': 'Deadmau5 - Some Chords (HD)',
761 'alt_title': 'Some Chords',
763 'expected_warnings': [
764 'DASH manifest missing',
767 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
769 'url': 'lqQg6PlCWgI',
774 'upload_date': '20150827',
775 'uploader_id': 'olympic',
776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
777 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
778 'uploader': 'Olympic',
779 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
782 'skip_download': 'requires avconv',
787 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
791 'stretched_ratio': 16 / 9.,
793 'upload_date': '20110310',
794 'uploader_id': 'AllenMeow',
795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
796 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
798 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
801 # url_encoded_fmt_stream_map is empty string
803 'url': 'qEJwOuvDf7I',
807 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809 'upload_date': '20150404',
810 'uploader_id': 'spbelect',
811 'uploader': 'Наблюдатели Петербурга',
814 'skip_download': 'requires avconv',
816 'skip': 'This live event has ended.',
818 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
820 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
824 'title': 'md5:7b81415841e02ecd4313668cde88737a',
825 'description': 'md5:116377fd2963b81ec4ce64b542173306',
827 'upload_date': '20150625',
828 'uploader_id': 'dorappi2000',
829 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
830 'uploader': 'dorappi2000',
831 'formats': 'mincount:31',
833 'skip': 'not actual anymore',
835 # DASH manifest with segment_list
837 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
838 'md5': '8ce563a1d667b599d21064e982ab9e31',
842 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
843 'uploader': 'Airtek',
844 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
845 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
846 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
849 'youtube_include_dash_manifest': True,
850 'format': '135', # bestvideo
852 'skip': 'This live event has ended.',
855 # Multifeed videos (multiple cameras), URL is for Main Camera
856 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
859 'title': 'teamPGP: Rocket League Noob Stream',
860 'description': 'md5:dc7872fb300e143831327f1bae3af010',
866 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
867 'description': 'md5:dc7872fb300e143831327f1bae3af010',
869 'upload_date': '20150721',
870 'uploader': 'Beer Games Beer',
871 'uploader_id': 'beergamesbeer',
872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
873 'license': 'Standard YouTube License',
879 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
880 'description': 'md5:dc7872fb300e143831327f1bae3af010',
882 'upload_date': '20150721',
883 'uploader': 'Beer Games Beer',
884 'uploader_id': 'beergamesbeer',
885 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
886 'license': 'Standard YouTube License',
892 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
893 'description': 'md5:dc7872fb300e143831327f1bae3af010',
895 'upload_date': '20150721',
896 'uploader': 'Beer Games Beer',
897 'uploader_id': 'beergamesbeer',
898 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
899 'license': 'Standard YouTube License',
905 'title': 'teamPGP: Rocket League Noob Stream (zim)',
906 'description': 'md5:dc7872fb300e143831327f1bae3af010',
908 'upload_date': '20150721',
909 'uploader': 'Beer Games Beer',
910 'uploader_id': 'beergamesbeer',
911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
912 'license': 'Standard YouTube License',
916 'skip_download': True,
918 'skip': 'This video is not available.',
921 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
922 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
925 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
928 'skip': 'Not multifeed anymore',
931 'url': 'https://vid.plus/FlRa-iH7PGw',
932 'only_matching': True,
935 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
936 'only_matching': True,
939 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
940 # Also tests cut-off URL expansion in video description (see
941 # https://github.com/ytdl-org/youtube-dl/issues/1892,
942 # https://github.com/ytdl-org/youtube-dl/issues/8164)
943 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
947 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
948 'alt_title': 'Dark Walk - Position Music',
949 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
951 'upload_date': '20151119',
952 'uploader_id': 'IronSoulElf',
953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
954 'uploader': 'IronSoulElf',
955 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
956 'track': 'Dark Walk - Position Music',
957 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
958 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
961 'skip_download': True,
965 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
966 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
967 'only_matching': True,
970 # Video with yt:stretch=17:0
971 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
975 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
976 'description': 'md5:ee18a25c350637c8faff806845bddee9',
977 'upload_date': '20151107',
978 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
979 'uploader': 'CH GAMER DROID',
982 'skip_download': True,
984 'skip': 'This video does not exist.',
987 # Video licensed under Creative Commons
988 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
992 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
993 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
995 'upload_date': '20150127',
996 'uploader_id': 'BerkmanCenter',
997 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
998 'uploader': 'The Berkman Klein Center for Internet & Society',
999 'license': 'Creative Commons Attribution license (reuse allowed)',
1002 'skip_download': True,
1006 # Channel-like uploader_url
1007 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009 'id': 'eQcmzGIKrzg',
1011 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1012 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1014 'upload_date': '20151119',
1015 'uploader': 'Bernie Sanders',
1016 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1018 'license': 'Creative Commons Attribution license (reuse allowed)',
1021 'skip_download': True,
1025 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1026 'only_matching': True,
1029 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1030 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1031 'only_matching': True,
1034 # Rental video preview
1035 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037 'id': 'uGpuVWrhIzE',
1039 'title': 'Piku - Trailer',
1040 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1041 'upload_date': '20150811',
1042 'uploader': 'FlixMatrix',
1043 'uploader_id': 'FlixMatrixKaravan',
1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1045 'license': 'Standard YouTube License',
1048 'skip_download': True,
1050 'skip': 'This video is not available.',
1053 # YouTube Red video with episode data
1054 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056 'id': 'iqKdEhx-dD4',
1058 'title': 'Isolation - Mind Field (Ep 1)',
1059 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1061 'upload_date': '20170118',
1062 'uploader': 'Vsauce',
1063 'uploader_id': 'Vsauce',
1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1065 'series': 'Mind Field',
1067 'episode_number': 1,
1070 'skip_download': True,
1072 'expected_warnings': [
1073 'Skipping DASH manifest',
1077 # The following content has been identified by the YouTube community
1078 # as inappropriate or offensive to some audiences.
1079 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081 'id': '6SJNVb0GnPI',
1083 'title': 'Race Differences in Intelligence',
1084 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086 'upload_date': '20140124',
1087 'uploader': 'New Century Foundation',
1088 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1092 'skip_download': True,
1097 'url': '1t24XAntNCY',
1098 'only_matching': True,
1101 # geo restricted to JP
1102 'url': 'sJL6WA-aGkQ',
1103 'only_matching': True,
1106 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1107 'only_matching': True,
1110 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1111 'only_matching': True,
1115 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1116 'only_matching': True,
1119 # Video with unsupported adaptive stream type formats
1120 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122 'id': 'Z4Vy8R84T1U',
1124 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1125 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127 'upload_date': '20130923',
1128 'uploader': 'Amelia Putri Harwita',
1129 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1130 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1131 'formats': 'maxcount:10',
1134 'skip_download': True,
1135 'youtube_include_dash_manifest': False,
1139 # Youtube Music Auto-generated description
1140 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'id': 'MgNrAu2pzNs',
1144 'title': 'Voyeur Girl',
1145 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1146 'upload_date': '20190312',
1147 'uploader': 'Various Artists - Topic',
1148 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1149 'artist': 'Stephen',
1150 'track': 'Voyeur Girl',
1151 'album': 'it\'s too much love to know my dear',
1152 'release_date': '20190313',
1153 'release_year': 2019,
1156 'skip_download': True,
1160 # Youtube Music Auto-generated description
1161 # Retrieve 'artist' field from 'Artist:' in video description
1162 # when it is present on youtube music video
1163 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'id': 'k0jLE7tTwjY',
1167 'title': 'Latch Feat. Sam Smith',
1168 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1169 'upload_date': '20150110',
1170 'uploader': 'Various Artists - Topic',
1171 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1172 'artist': 'Disclosure',
1173 'track': 'Latch Feat. Sam Smith',
1174 'album': 'Latch Featuring Sam Smith',
1175 'release_date': '20121008',
1176 'release_year': 2012,
1179 'skip_download': True,
1183 # Youtube Music Auto-generated description
1184 # handle multiple artists on youtube music video
1185 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'id': '74qn0eJSjpA',
1189 'title': 'Eastside',
1190 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1191 'upload_date': '20180710',
1192 'uploader': 'Benny Blanco - Topic',
1193 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1194 'artist': 'benny blanco, Halsey, Khalid',
1195 'track': 'Eastside',
1196 'album': 'Eastside',
1197 'release_date': '20180713',
1198 'release_year': 2018,
1201 'skip_download': True,
1205 # Youtube Music Auto-generated description
1206 # handle youtube music video with release_year and no release_date
1207 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'id': '-hcAI0g-f5M',
1211 'title': 'Put It On Me',
1212 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1213 'upload_date': '20180426',
1214 'uploader': 'Matt Maeson - Topic',
1215 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1216 'artist': 'Matt Maeson',
1217 'track': 'Put It On Me',
1218 'album': 'The Hearse',
1219 'release_date': None,
1220 'release_year': 2018,
1223 'skip_download': True,
1228 def __init__(self, *args, **kwargs):
1229 super(YoutubeIE, self).__init__(*args, **kwargs)
1230 self._player_cache = {}
1232 def report_video_info_webpage_download(self, video_id):
1233 """Report attempt to download video info webpage."""
1234 self.to_screen('%s: Downloading video info webpage' % video_id)
1236 def report_information_extraction(self, video_id):
1237 """Report attempt to extract video information."""
1238 self.to_screen('%s: Extracting video information' % video_id)
1240 def report_unavailable_format(self, video_id, format):
1241 """Report extracted video URL."""
1242 self.to_screen('%s: Format %s not available' % (video_id, format))
1244 def report_rtmp_download(self):
1245 """Indicate the download will use the RTMP protocol."""
1246 self.to_screen('RTMP download detected')
1248 def _signature_cache_id(self, example_sig):
1249 """ Return a string representation of a signature """
1250 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1252 def _extract_signature_function(self, video_id, player_url, example_sig):
1254 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1257 raise ExtractorError('Cannot identify player %r' % player_url)
1258 player_type = id_m.group('ext')
1259 player_id = id_m.group('id')
1261 # Read from filesystem cache
1262 func_id = '%s_%s_%s' % (
1263 player_type, player_id, self._signature_cache_id(example_sig))
1264 assert os.path.basename(func_id) == func_id
1266 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1267 if cache_spec is not None:
1268 return lambda s: ''.join(s[i] for i in cache_spec)
1271 'Downloading player %s' % player_url
1272 if self._downloader.params.get('verbose') else
1273 'Downloading %s player %s' % (player_type, player_id)
1275 if player_type == 'js':
1276 code = self._download_webpage(
1277 player_url, video_id,
1279 errnote='Download of %s failed' % player_url)
1280 res = self._parse_sig_js(code)
1281 elif player_type == 'swf':
1282 urlh = self._request_webpage(
1283 player_url, video_id,
1285 errnote='Download of %s failed' % player_url)
1287 res = self._parse_sig_swf(code)
1289 assert False, 'Invalid player type %r' % player_type
1291 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1292 cache_res = res(test_string)
1293 cache_spec = [ord(c) for c in cache_res]
1295 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1298 def _print_sig_code(self, func, example_sig):
1299 def gen_sig_code(idxs):
1300 def _genslice(start, end, step):
1301 starts = '' if start == 0 else str(start)
1302 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1303 steps = '' if step == 1 else (':%d' % step)
1304 return 's[%s%s%s]' % (starts, ends, steps)
1307 # Quelch pyflakes warnings - start will be set when step is set
1308 start = '(Never used)'
1309 for i, prev in zip(idxs[1:], idxs[:-1]):
1310 if step is not None:
1311 if i - prev == step:
1313 yield _genslice(start, prev, step)
1316 if i - prev in [-1, 1]:
1321 yield 's[%d]' % prev
1325 yield _genslice(start, i, step)
1327 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1328 cache_res = func(test_string)
1329 cache_spec = [ord(c) for c in cache_res]
1330 expr_code = ' + '.join(gen_sig_code(cache_spec))
1331 signature_id_tuple = '(%s)' % (
1332 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1333 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1334 ' return %s\n') % (signature_id_tuple, expr_code)
1335 self.to_screen('Extracted signature function:\n' + code)
1337 def _parse_sig_js(self, jscode):
1338 funcname = self._search_regex(
1339 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1343 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1344 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1347 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1351 jscode, 'Initial JS player signature function name', group='sig')
1353 jsi = JSInterpreter(jscode)
1354 initial_function = jsi.extract_function(funcname)
1355 return lambda s: initial_function([s])
1357 def _parse_sig_swf(self, file_contents):
1358 swfi = SWFInterpreter(file_contents)
1359 TARGET_CLASSNAME = 'SignatureDecipher'
1360 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1361 initial_function = swfi.extract_function(searched_class, 'decipher')
1362 return lambda s: initial_function([s])
1364 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1365 """Turn the encrypted s field into a working signature"""
1367 if player_url is None:
1368 raise ExtractorError('Cannot decrypt signature without player_url')
1370 if player_url.startswith('//'):
1371 player_url = 'https:' + player_url
1372 elif not re.match(r'https?://', player_url):
1373 player_url = compat_urlparse.urljoin(
1374 'https://www.youtube.com', player_url)
1376 player_id = (player_url, self._signature_cache_id(s))
1377 if player_id not in self._player_cache:
1378 func = self._extract_signature_function(
1379 video_id, player_url, s
1381 self._player_cache[player_id] = func
1382 func = self._player_cache[player_id]
1383 if self._downloader.params.get('youtube_print_sig_code'):
1384 self._print_sig_code(func, s)
1386 except Exception as e:
1387 tb = traceback.format_exc()
1388 raise ExtractorError(
1389 'Signature extraction failed: ' + tb, cause=e)
1391 def _get_subtitles(self, video_id, webpage):
1393 subs_doc = self._download_xml(
1394 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1395 video_id, note=False)
1396 except ExtractorError as err:
1397 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1401 for track in subs_doc.findall('track'):
1402 lang = track.attrib['lang_code']
1403 if lang in sub_lang_list:
1406 for ext in self._SUBTITLE_FORMATS:
1407 params = compat_urllib_parse_urlencode({
1411 'name': track.attrib['name'].encode('utf-8'),
1413 sub_formats.append({
1414 'url': 'https://www.youtube.com/api/timedtext?' + params,
1417 sub_lang_list[lang] = sub_formats
1418 if not sub_lang_list:
1419 self._downloader.report_warning('video doesn\'t have subtitles')
1421 return sub_lang_list
1423 def _get_ytplayer_config(self, video_id, webpage):
1425 # User data may contain arbitrary character sequences that may affect
1426 # JSON extraction with regex, e.g. when '};' is contained the second
1427 # regex won't capture the whole JSON. Yet working around by trying more
1428 # concrete regex first keeping in mind proper quoted string handling
1429 # to be implemented in future that will replace this workaround (see
1430 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1431 # https://github.com/ytdl-org/youtube-dl/pull/7599)
1432 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1433 r';ytplayer\.config\s*=\s*({.+?});',
1435 config = self._search_regex(
1436 patterns, webpage, 'ytplayer.config', default=None)
1438 return self._parse_json(
1439 uppercase_escape(config), video_id, fatal=False)
1441 def _get_automatic_captions(self, video_id, webpage):
1442 """We need the webpage for getting the captions url, pass it as an
1443 argument to speed up the process."""
1444 self.to_screen('%s: Looking for automatic captions' % video_id)
1445 player_config = self._get_ytplayer_config(video_id, webpage)
1446 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1447 if not player_config:
1448 self._downloader.report_warning(err_msg)
1451 args = player_config['args']
1452 caption_url = args.get('ttsurl')
1454 timestamp = args['timestamp']
1455 # We get the available subtitles
1456 list_params = compat_urllib_parse_urlencode({
1461 list_url = caption_url + '&' + list_params
1462 caption_list = self._download_xml(list_url, video_id)
1463 original_lang_node = caption_list.find('track')
1464 if original_lang_node is None:
1465 self._downloader.report_warning('Video doesn\'t have automatic captions')
1467 original_lang = original_lang_node.attrib['lang_code']
1468 caption_kind = original_lang_node.attrib.get('kind', '')
1471 for lang_node in caption_list.findall('target'):
1472 sub_lang = lang_node.attrib['lang_code']
1474 for ext in self._SUBTITLE_FORMATS:
1475 params = compat_urllib_parse_urlencode({
1476 'lang': original_lang,
1480 'kind': caption_kind,
1482 sub_formats.append({
1483 'url': caption_url + '&' + params,
1486 sub_lang_list[sub_lang] = sub_formats
1487 return sub_lang_list
1489 def make_captions(sub_url, sub_langs):
1490 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1491 caption_qs = compat_parse_qs(parsed_sub_url.query)
1493 for sub_lang in sub_langs:
1495 for ext in self._SUBTITLE_FORMATS:
1497 'tlang': [sub_lang],
1500 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1501 query=compat_urllib_parse_urlencode(caption_qs, True)))
1502 sub_formats.append({
1506 captions[sub_lang] = sub_formats
1509 # New captions format as of 22.06.2017
1510 player_response = args.get('player_response')
1511 if player_response and isinstance(player_response, compat_str):
1512 player_response = self._parse_json(
1513 player_response, video_id, fatal=False)
1515 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1516 base_url = renderer['captionTracks'][0]['baseUrl']
1518 for lang in renderer['translationLanguages']:
1519 lang_code = lang.get('languageCode')
1521 sub_lang_list.append(lang_code)
1522 return make_captions(base_url, sub_lang_list)
1524 # Some videos don't provide ttsurl but rather caption_tracks and
1525 # caption_translation_languages (e.g. 20LmZk1hakA)
1526 # Does not used anymore as of 22.06.2017
1527 caption_tracks = args['caption_tracks']
1528 caption_translation_languages = args['caption_translation_languages']
1529 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1531 for lang in caption_translation_languages.split(','):
1532 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1533 sub_lang = lang_qs.get('lc', [None])[0]
1535 sub_lang_list.append(sub_lang)
1536 return make_captions(caption_url, sub_lang_list)
1537 # An extractor error can be raise by the download process if there are
1538 # no automatic captions but there are subtitles
1539 except (KeyError, IndexError, ExtractorError):
1540 self._downloader.report_warning(err_msg)
1543 def _mark_watched(self, video_id, video_info, player_response):
1544 playback_url = url_or_none(try_get(
1546 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1547 video_info, lambda x: x['videostats_playback_base_url'][0]))
1548 if not playback_url:
1550 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1551 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1553 # cpn generation algorithm is reverse engineered from base.js.
1554 # In fact it works even with dummy cpn.
1555 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1556 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1562 playback_url = compat_urlparse.urlunparse(
1563 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1565 self._download_webpage(
1566 playback_url, video_id, 'Marking watched',
1567 'Unable to mark watched', fatal=False)
1570 def _extract_urls(webpage):
1571 # Embedded YouTube player
1573 unescapeHTML(mobj.group('url'))
1574 for mobj in re.finditer(r'''(?x)
1584 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1585 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1588 # lazyYT YouTube embed
1589 entries.extend(list(map(
1591 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1593 # Wordpress "YouTube Video Importer" plugin
1594 matches = re.findall(r'''(?x)<div[^>]+
1595 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1596 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1597 entries.extend(m[-1] for m in matches)
1602 def _extract_url(webpage):
1603 urls = YoutubeIE._extract_urls(webpage)
1604 return urls[0] if urls else None
1607 def extract_id(cls, url):
1608 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1610 raise ExtractorError('Invalid URL: %s' % url)
1611 video_id = mobj.group(2)
1615 def _extract_chapters(description, duration):
1618 chapter_lines = re.findall(
1619 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1621 if not chapter_lines:
1624 for next_num, (chapter_line, time_point) in enumerate(
1625 chapter_lines, start=1):
1626 start_time = parse_duration(time_point)
1627 if start_time is None:
1629 if start_time > duration:
1631 end_time = (duration if next_num == len(chapter_lines)
1632 else parse_duration(chapter_lines[next_num][1]))
1633 if end_time is None:
1635 if end_time > duration:
1637 if start_time > end_time:
1639 chapter_title = re.sub(
1640 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1641 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1643 'start_time': start_time,
1644 'end_time': end_time,
1645 'title': chapter_title,
1649 def _real_extract(self, url):
1650 url, smuggled_data = unsmuggle_url(url, {})
1653 'http' if self._downloader.params.get('prefer_insecure', False)
1658 parsed_url = compat_urllib_parse_urlparse(url)
1659 for component in [parsed_url.fragment, parsed_url.query]:
1660 query = compat_parse_qs(component)
1661 if start_time is None and 't' in query:
1662 start_time = parse_duration(query['t'][0])
1663 if start_time is None and 'start' in query:
1664 start_time = parse_duration(query['start'][0])
1665 if end_time is None and 'end' in query:
1666 end_time = parse_duration(query['end'][0])
1668 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1669 mobj = re.search(self._NEXT_URL_RE, url)
1671 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1672 video_id = self.extract_id(url)
1675 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1676 video_webpage = self._download_webpage(url, video_id)
1678 # Attempt to extract SWF player URL
1679 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1680 if mobj is not None:
1681 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1687 def add_dash_mpd(video_info):
1688 dash_mpd = video_info.get('dashmpd')
1689 if dash_mpd and dash_mpd[0] not in dash_mpds:
1690 dash_mpds.append(dash_mpd[0])
1692 def add_dash_mpd_pr(pl_response):
1693 dash_mpd = url_or_none(try_get(
1694 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1696 if dash_mpd and dash_mpd not in dash_mpds:
1697 dash_mpds.append(dash_mpd)
1702 def extract_view_count(v_info):
1703 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1705 def extract_token(v_info):
1706 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1708 def extract_player_response(player_response, video_id):
1709 pl_response = str_or_none(player_response)
1712 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1713 if isinstance(pl_response, dict):
1714 add_dash_mpd_pr(pl_response)
1717 player_response = {}
1720 embed_webpage = None
1721 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1723 # We simulate the access to the video from www.youtube.com/v/{video_id}
1724 # this can be viewed without login into Youtube
1725 url = proto + '://www.youtube.com/embed/%s' % video_id
1726 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1727 data = compat_urllib_parse_urlencode({
1728 'video_id': video_id,
1729 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1730 'sts': self._search_regex(
1731 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1733 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1734 video_info_webpage = self._download_webpage(
1735 video_info_url, video_id,
1736 note='Refetching age-gated info webpage',
1737 errnote='unable to download video info webpage')
1738 video_info = compat_parse_qs(video_info_webpage)
1739 pl_response = video_info.get('player_response', [None])[0]
1740 player_response = extract_player_response(pl_response, video_id)
1741 add_dash_mpd(video_info)
1742 view_count = extract_view_count(video_info)
1747 # Try looking directly into the video webpage
1748 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1750 args = ytplayer_config['args']
1751 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1752 # Convert to the same format returned by compat_parse_qs
1753 video_info = dict((k, [v]) for k, v in args.items())
1754 add_dash_mpd(video_info)
1755 # Rental video is not rented but preview is available (e.g.
1756 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1757 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1758 if not video_info and args.get('ypc_vid'):
1759 return self.url_result(
1760 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1761 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1763 sts = ytplayer_config.get('sts')
1764 if not player_response:
1765 player_response = extract_player_response(args.get('player_response'), video_id)
1766 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1767 add_dash_mpd_pr(player_response)
1768 # We also try looking in get_video_info since it may contain different dashmpd
1769 # URL that points to a DASH manifest with possibly different itag set (some itags
1770 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1771 # manifest pointed by get_video_info's dashmpd).
1772 # The general idea is to take a union of itags of both DASH manifests (for example
1773 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1774 self.report_video_info_webpage_download(video_id)
1775 for el in ('embedded', 'detailpage', 'vevo', ''):
1777 'video_id': video_id,
1787 video_info_webpage = self._download_webpage(
1788 '%s://www.youtube.com/get_video_info' % proto,
1789 video_id, note=False,
1790 errnote='unable to download video info webpage',
1791 fatal=False, query=query)
1792 if not video_info_webpage:
1794 get_video_info = compat_parse_qs(video_info_webpage)
1795 if not player_response:
1796 pl_response = get_video_info.get('player_response', [None])[0]
1797 player_response = extract_player_response(pl_response, video_id)
1798 add_dash_mpd(get_video_info)
1799 if view_count is None:
1800 view_count = extract_view_count(get_video_info)
1802 video_info = get_video_info
1803 get_token = extract_token(get_video_info)
1805 # Different get_video_info requests may report different results, e.g.
1806 # some may report video unavailability, but some may serve it without
1807 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1808 # the original webpage as well as el=info and el=embedded get_video_info
1809 # requests report video unavailability due to geo restriction while
1810 # el=detailpage succeeds and returns valid data). This is probably
1811 # due to YouTube measures against IP ranges of hosting providers.
1812 # Working around by preferring the first succeeded video_info containing
1813 # the token if no such video_info yet was found.
1814 token = extract_token(video_info)
1816 video_info = get_video_info
1819 def extract_unavailable_message():
1821 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1822 msg = self._html_search_regex(
1823 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1824 video_webpage, 'unavailable %s' % kind, default=None)
1826 messages.append(msg)
1828 return '\n'.join(messages)
1831 unavailable_message = extract_unavailable_message()
1832 if not unavailable_message:
1833 unavailable_message = 'Unable to extract video data'
1834 raise ExtractorError(
1835 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1837 video_details = try_get(
1838 player_response, lambda x: x['videoDetails'], dict) or {}
1840 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1842 self._downloader.report_warning('Unable to extract video title')
1845 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1846 if video_description:
1849 redir_url = compat_urlparse.urljoin(url, m.group(1))
1850 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1851 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1852 qs = compat_parse_qs(parsed_redir_url.query)
1858 description_original = video_description = re.sub(r'''(?x)
1860 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1861 (?:title|href)="([^"]+)"\s+
1862 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1866 ''', replace_url, video_description)
1867 video_description = clean_html(video_description)
1869 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1871 if not smuggled_data.get('force_singlefeed', False):
1872 if not self._downloader.params.get('noplaylist'):
1873 multifeed_metadata_list = try_get(
1875 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1876 compat_str) or try_get(
1877 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1878 if multifeed_metadata_list:
1881 for feed in multifeed_metadata_list.split(','):
1882 # Unquote should take place before split on comma (,) since textual
1883 # fields may contain comma as well (see
1884 # https://github.com/ytdl-org/youtube-dl/issues/8536)
1885 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1887 '_type': 'url_transparent',
1888 'ie_key': 'Youtube',
1890 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1891 {'force_singlefeed': True}),
1892 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1894 feed_ids.append(feed_data['id'][0])
1896 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1897 % (', '.join(feed_ids), video_id))
1898 return self.playlist_result(entries, video_id, video_title, video_description)
1900 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1902 if view_count is None:
1903 view_count = extract_view_count(video_info)
1904 if view_count is None and video_details:
1905 view_count = int_or_none(video_details.get('viewCount'))
1908 is_live = bool_or_none(video_details.get('isLive'))
1910 # Check for "rental" videos
1911 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1912 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1914 def _extract_filesize(media_url):
1915 return int_or_none(self._search_regex(
1916 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1918 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1919 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1921 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1922 self.report_rtmp_download()
1924 'format_id': '_rtmp',
1926 'url': video_info['conn'][0],
1927 'player_url': player_url,
1929 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1930 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1931 if 'rtmpe%3Dyes' in encoded_url_map:
1932 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1935 fmt_list = video_info.get('fmt_list', [''])[0]
1937 for fmt in fmt_list.split(','):
1938 spec = fmt.split('/')
1940 width_height = spec[1].split('x')
1941 if len(width_height) == 2:
1942 formats_spec[spec[0]] = {
1943 'resolution': spec[1],
1944 'width': int_or_none(width_height[0]),
1945 'height': int_or_none(width_height[1]),
1947 for fmt in streaming_formats:
1948 itag = str_or_none(fmt.get('itag'))
1951 quality = fmt.get('quality')
1952 quality_label = fmt.get('qualityLabel') or quality
1953 formats_spec[itag] = {
1954 'asr': int_or_none(fmt.get('audioSampleRate')),
1955 'filesize': int_or_none(fmt.get('contentLength')),
1956 'format_note': quality_label,
1957 'fps': int_or_none(fmt.get('fps')),
1958 'height': int_or_none(fmt.get('height')),
1959 # bitrate for itag 43 is always 2147483647
1960 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1961 'width': int_or_none(fmt.get('width')),
1964 for fmt in streaming_formats:
1965 if fmt.get('drm_families'):
1967 url = url_or_none(fmt.get('url'))
1970 cipher = fmt.get('cipher')
1973 url_data = compat_parse_qs(cipher)
1974 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1979 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1981 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1982 # Unsupported FORMAT_STREAM_TYPE_OTF
1983 if stream_type == 3:
1986 format_id = fmt.get('itag') or url_data['itag'][0]
1989 format_id = compat_str(format_id)
1992 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1993 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1994 jsplayer_url_json = self._search_regex(
1996 embed_webpage if age_gate else video_webpage,
1997 'JS player URL (1)', default=None)
1998 if not jsplayer_url_json and not age_gate:
1999 # We need the embed website after all
2000 if embed_webpage is None:
2001 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2002 embed_webpage = self._download_webpage(
2003 embed_url, video_id, 'Downloading embed webpage')
2004 jsplayer_url_json = self._search_regex(
2005 ASSETS_RE, embed_webpage, 'JS player URL')
2007 player_url = json.loads(jsplayer_url_json)
2008 if player_url is None:
2009 player_url_json = self._search_regex(
2010 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2011 video_webpage, 'age gate player URL')
2012 player_url = json.loads(player_url_json)
2014 if 'sig' in url_data:
2015 url += '&signature=' + url_data['sig'][0]
2016 elif 's' in url_data:
2017 encrypted_sig = url_data['s'][0]
2019 if self._downloader.params.get('verbose'):
2020 if player_url is None:
2021 player_version = 'unknown'
2022 player_desc = 'unknown'
2024 if player_url.endswith('swf'):
2025 player_version = self._search_regex(
2026 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2027 'flash player', fatal=False)
2028 player_desc = 'flash player %s' % player_version
2030 player_version = self._search_regex(
2031 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2032 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2034 'html5 player', fatal=False)
2035 player_desc = 'html5 player %s' % player_version
2037 parts_sizes = self._signature_cache_id(encrypted_sig)
2038 self.to_screen('{%s} signature length %s, %s' %
2039 (format_id, parts_sizes, player_desc))
2041 signature = self._decrypt_signature(
2042 encrypted_sig, video_id, player_url, age_gate)
2043 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2044 url += '&%s=%s' % (sp, signature)
2045 if 'ratebypass' not in url:
2046 url += '&ratebypass=yes'
2049 'format_id': format_id,
2051 'player_url': player_url,
2053 if format_id in self._formats:
2054 dct.update(self._formats[format_id])
2055 if format_id in formats_spec:
2056 dct.update(formats_spec[format_id])
2058 # Some itags are not included in DASH manifest thus corresponding formats will
2059 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2060 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2061 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2062 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2065 width = int_or_none(fmt.get('width'))
2067 height = int_or_none(fmt.get('height'))
2069 filesize = int_or_none(url_data.get(
2070 'clen', [None])[0]) or _extract_filesize(url)
2072 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2073 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2075 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2076 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2077 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2080 'filesize': filesize,
2085 'format_note': quality_label or quality,
2087 for key, value in more_fields.items():
2090 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2092 type_split = type_.split(';')
2093 kind_ext = type_split[0].split('/')
2094 if len(kind_ext) == 2:
2096 dct['ext'] = mimetype2ext(type_split[0])
2097 if kind in ('audio', 'video'):
2099 for mobj in re.finditer(
2100 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2101 if mobj.group('key') == 'codecs':
2102 codecs = mobj.group('val')
2105 dct.update(parse_codecs(codecs))
2106 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2107 dct['downloader_options'] = {
2108 # Youtube throttles chunks >~10M
2109 'http_chunk_size': 10485760,
2114 url_or_none(try_get(
2116 lambda x: x['streamingData']['hlsManifestUrl'],
2118 or url_or_none(try_get(
2119 video_info, lambda x: x['hlsvp'][0], compat_str)))
2122 m3u8_formats = self._extract_m3u8_formats(
2123 manifest_url, video_id, 'mp4', fatal=False)
2124 for a_format in m3u8_formats:
2125 itag = self._search_regex(
2126 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2128 a_format['format_id'] = itag
2129 if itag in self._formats:
2130 dct = self._formats[itag].copy()
2131 dct.update(a_format)
2133 a_format['player_url'] = player_url
2134 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2135 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2136 formats.append(a_format)
2138 error_message = extract_unavailable_message()
2139 if not error_message:
2140 error_message = clean_html(try_get(
2141 player_response, lambda x: x['playabilityStatus']['reason'],
2143 if not error_message:
2144 error_message = clean_html(
2145 try_get(video_info, lambda x: x['reason'][0], compat_str))
2147 raise ExtractorError(error_message, expected=True)
2148 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2151 video_uploader = try_get(
2152 video_info, lambda x: x['author'][0],
2153 compat_str) or str_or_none(video_details.get('author'))
2155 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2157 self._downloader.report_warning('unable to extract uploader name')
2160 video_uploader_id = None
2161 video_uploader_url = None
2163 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2165 if mobj is not None:
2166 video_uploader_id = mobj.group('uploader_id')
2167 video_uploader_url = mobj.group('uploader_url')
2169 self._downloader.report_warning('unable to extract uploader nickname')
2172 str_or_none(video_details.get('channelId'))
2173 or self._html_search_meta(
2174 'channelId', video_webpage, 'channel id', default=None)
2175 or self._search_regex(
2176 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2177 video_webpage, 'channel id', default=None, group='id'))
2178 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2181 # We try first to get a high quality image:
2182 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2183 video_webpage, re.DOTALL)
2184 if m_thumb is not None:
2185 video_thumbnail = m_thumb.group(1)
2186 elif 'thumbnail_url' not in video_info:
2187 self._downloader.report_warning('unable to extract video thumbnail')
2188 video_thumbnail = None
2189 else: # don't panic if we can't find it
2190 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2193 upload_date = self._html_search_meta(
2194 'datePublished', video_webpage, 'upload date', default=None)
2196 upload_date = self._search_regex(
2197 [r'(?s)id="eow-date.*?>(.*?)</span>',
2198 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2199 video_webpage, 'upload date', default=None)
2200 upload_date = unified_strdate(upload_date)
2202 video_license = self._html_search_regex(
2203 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2204 video_webpage, 'license', default=None)
2206 m_music = re.search(
2208 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2216 \bhref=["\']/red[^>]*>| # drop possible
2217 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2224 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2225 video_creator = clean_html(m_music.group('creator'))
2227 video_alt_title = video_creator = None
2229 def extract_meta(field):
2230 return self._html_search_regex(
2231 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2232 video_webpage, field, default=None)
2234 track = extract_meta('Song')
2235 artist = extract_meta('Artist')
2236 album = extract_meta('Album')
2238 # Youtube Music Auto-generated description
2239 release_date = release_year = None
2240 if video_description:
2241 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2244 track = mobj.group('track').strip()
2246 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2248 album = mobj.group('album'.strip())
2249 release_year = mobj.group('release_year')
2250 release_date = mobj.group('release_date')
2252 release_date = release_date.replace('-', '')
2253 if not release_year:
2254 release_year = int(release_date[:4])
2256 release_year = int(release_year)
2258 m_episode = re.search(
2259 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2262 series = unescapeHTML(m_episode.group('series'))
2263 season_number = int(m_episode.group('season'))
2264 episode_number = int(m_episode.group('episode'))
2266 series = season_number = episode_number = None
2268 m_cat_container = self._search_regex(
2269 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2270 video_webpage, 'categories', default=None)
2272 category = self._html_search_regex(
2273 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2275 video_categories = None if category is None else [category]
2277 video_categories = None
2280 unescapeHTML(m.group('content'))
2281 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2283 def _extract_count(count_name):
2284 return str_to_int(self._search_regex(
2285 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2286 % re.escape(count_name),
2287 video_webpage, count_name, default=None))
2289 like_count = _extract_count('like')
2290 dislike_count = _extract_count('dislike')
2292 if view_count is None:
2293 view_count = str_to_int(self._search_regex(
2294 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2295 'view count', default=None))
2298 float_or_none(video_details.get('averageRating'))
2299 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2302 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2303 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2305 video_duration = try_get(
2306 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2307 if not video_duration:
2308 video_duration = int_or_none(video_details.get('lengthSeconds'))
2309 if not video_duration:
2310 video_duration = parse_duration(self._html_search_meta(
2311 'duration', video_webpage, 'video duration'))
2314 video_annotations = None
2315 if self._downloader.params.get('writeannotations', False):
2316 xsrf_token = self._search_regex(
2317 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2318 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2319 invideo_url = try_get(
2320 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2321 if xsrf_token and invideo_url:
2322 xsrf_field_name = self._search_regex(
2323 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2324 video_webpage, 'xsrf field name',
2325 group='xsrf_field_name', default='session_token')
2326 video_annotations = self._download_webpage(
2327 self._proto_relative_url(invideo_url),
2328 video_id, note='Downloading annotations',
2329 errnote='Unable to download video annotations', fatal=False,
2330 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2332 chapters = self._extract_chapters(description_original, video_duration)
2334 # Look for the DASH manifest
2335 if self._downloader.params.get('youtube_include_dash_manifest', True):
2336 dash_mpd_fatal = True
2337 for mpd_url in dash_mpds:
2340 def decrypt_sig(mobj):
2342 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2343 return '/signature/%s' % dec_s
2345 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2347 for df in self._extract_mpd_formats(
2348 mpd_url, video_id, fatal=dash_mpd_fatal,
2349 formats_dict=self._formats):
2350 if not df.get('filesize'):
2351 df['filesize'] = _extract_filesize(df['url'])
2352 # Do not overwrite DASH format found in some previous DASH manifest
2353 if df['format_id'] not in dash_formats:
2354 dash_formats[df['format_id']] = df
2355 # Additional DASH manifests may end up in HTTP Error 403 therefore
2356 # allow them to fail without bug report message if we already have
2357 # some DASH manifest succeeded. This is temporary workaround to reduce
2358 # burst of bug reports until we figure out the reason and whether it
2359 # can be fixed at all.
2360 dash_mpd_fatal = False
2361 except (ExtractorError, KeyError) as e:
2362 self.report_warning(
2363 'Skipping DASH manifest: %r' % e, video_id)
2365 # Remove the formats we found through non-DASH, they
2366 # contain less info and it can be wrong, because we use
2367 # fixed values (for example the resolution). See
2368 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2370 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2371 formats.extend(dash_formats.values())
2373 # Check for malformed aspect ratio
2374 stretched_m = re.search(
2375 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2378 w = float(stretched_m.group('w'))
2379 h = float(stretched_m.group('h'))
2380 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2381 # We will only process correct ratios.
2385 if f.get('vcodec') != 'none':
2386 f['stretched_ratio'] = ratio
2389 token = extract_token(video_info)
2391 if 'reason' in video_info:
2392 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2393 regions_allowed = self._html_search_meta(
2394 'regionsAllowed', video_webpage, default=None)
2395 countries = regions_allowed.split(',') if regions_allowed else None
2396 self.raise_geo_restricted(
2397 msg=video_info['reason'][0], countries=countries)
2398 reason = video_info['reason'][0]
2399 if 'Invalid parameters' in reason:
2400 unavailable_message = extract_unavailable_message()
2401 if unavailable_message:
2402 reason = unavailable_message
2403 raise ExtractorError(
2404 'YouTube said: %s' % reason,
2405 expected=True, video_id=video_id)
2407 raise ExtractorError(
2408 '"token" parameter not in video info for unknown reason',
2411 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2412 raise ExtractorError('This video is DRM protected.', expected=True)
2414 self._sort_formats(formats)
2416 self.mark_watched(video_id, video_info, player_response)
2420 'uploader': video_uploader,
2421 'uploader_id': video_uploader_id,
2422 'uploader_url': video_uploader_url,
2423 'channel_id': channel_id,
2424 'channel_url': channel_url,
2425 'upload_date': upload_date,
2426 'license': video_license,
2427 'creator': video_creator or artist,
2428 'title': video_title,
2429 'alt_title': video_alt_title or track,
2430 'thumbnail': video_thumbnail,
2431 'description': video_description,
2432 'categories': video_categories,
2434 'subtitles': video_subtitles,
2435 'automatic_captions': automatic_captions,
2436 'duration': video_duration,
2437 'age_limit': 18 if age_gate else 0,
2438 'annotations': video_annotations,
2439 'chapters': chapters,
2440 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2441 'view_count': view_count,
2442 'like_count': like_count,
2443 'dislike_count': dislike_count,
2444 'average_rating': average_rating,
2447 'start_time': start_time,
2448 'end_time': end_time,
2450 'season_number': season_number,
2451 'episode_number': episode_number,
2455 'release_date': release_date,
2456 'release_year': release_year,
2460 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2461 IE_DESC = 'YouTube.com playlists'
2462 _VALID_URL = r"""(?x)(?:
2472 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2473 \? (?:.*?[&;])*? (?:p|a|list)=
2476 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2479 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2480 # Top tracks, they can also include dots
2486 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2487 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2488 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2489 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2490 IE_NAME = 'youtube:playlist'
2492 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2494 'title': 'ytdl test PL',
2495 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2497 'playlist_count': 3,
2499 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2501 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2502 'title': 'YDL_Empty_List',
2504 'playlist_count': 0,
2505 'skip': 'This playlist is private',
2507 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2508 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2510 'title': '29C3: Not my department',
2511 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2512 'uploader': 'Christiaan008',
2513 'uploader_id': 'ChRiStIaAn008',
2515 'playlist_count': 95,
2517 'note': 'issue #673',
2518 'url': 'PLBB231211A4F62143',
2520 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2521 'id': 'PLBB231211A4F62143',
2522 'uploader': 'Wickydoo',
2523 'uploader_id': 'Wickydoo',
2525 'playlist_mincount': 26,
2527 'note': 'Large playlist',
2528 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2530 'title': 'Uploads from Cauchemar',
2531 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2532 'uploader': 'Cauchemar',
2533 'uploader_id': 'Cauchemar89',
2535 'playlist_mincount': 799,
2537 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2539 'title': 'YDL_safe_search',
2540 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2542 'playlist_count': 2,
2543 'skip': 'This playlist is private',
2546 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2547 'playlist_count': 4,
2550 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2551 'uploader': 'milan',
2552 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2555 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2556 'playlist_mincount': 485,
2558 'title': '2018 Chinese New Singles (11/6 updated)',
2559 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2561 'uploader_id': 'sdragonfang',
2564 'note': 'Embedded SWF player',
2565 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2566 'playlist_count': 4,
2569 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2571 'skip': 'This playlist does not exist',
2573 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2574 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2576 'title': 'Uploads from Interstellar Movie',
2577 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2578 'uploader': 'Interstellar Movie',
2579 'uploader_id': 'InterstellarMovie1',
2581 'playlist_mincount': 21,
2583 # Playlist URL that does not actually serve a playlist
2584 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2586 'id': 'FqZTN594JQw',
2588 'title': "Smiley's People 01 detective, Adventure Series, Action",
2589 'uploader': 'STREEM',
2590 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2591 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2592 'upload_date': '20150526',
2593 'license': 'Standard YouTube License',
2594 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2595 'categories': ['People & Blogs'],
2599 'dislike_count': int,
2602 'skip_download': True,
2604 'skip': 'This video is not available.',
2605 'add_ie': [YoutubeIE.ie_key()],
2607 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2609 'id': 'yeWKywCrFtk',
2611 'title': 'Small Scale Baler and Braiding Rugs',
2612 'uploader': 'Backus-Page House Museum',
2613 'uploader_id': 'backuspagemuseum',
2614 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2615 'upload_date': '20161008',
2616 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2617 'categories': ['Nonprofits & Activism'],
2620 'dislike_count': int,
2624 'skip_download': True,
2627 # https://github.com/ytdl-org/youtube-dl/issues/21844
2628 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2630 'title': 'Data Analysis with Dr Mike Pound',
2631 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2632 'uploader_id': 'Computerphile',
2633 'uploader': 'Computerphile',
2635 'playlist_mincount': 11,
2637 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2638 'only_matching': True,
2640 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2641 'only_matching': True,
2643 # music album playlist
2644 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2645 'only_matching': True,
2647 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2648 'only_matching': True,
2651 def _real_initialize(self):
2654 def extract_videos_from_page(self, page):
2658 for item in re.findall(
2659 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2660 attrs = extract_attributes(item)
2661 video_id = attrs['data-video-id']
2662 video_title = unescapeHTML(attrs.get('data-title'))
2664 video_title = video_title.strip()
2665 ids_in_page.append(video_id)
2666 titles_in_page.append(video_title)
2668 # Fallback with old _VIDEO_RE
2669 self.extract_videos_from_page_impl(
2670 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2673 self.extract_videos_from_page_impl(
2674 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2675 ids_in_page, titles_in_page)
2676 self.extract_videos_from_page_impl(
2677 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2678 ids_in_page, titles_in_page)
2680 return zip(ids_in_page, titles_in_page)
2682 def _extract_mix(self, playlist_id):
2683 # The mixes are generated from a single video
2684 # the id of the playlist is just 'RD' + video_id
2686 last_id = playlist_id[-11:]
2687 for n in itertools.count(1):
2688 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2689 webpage = self._download_webpage(
2690 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2691 new_ids = orderedSet(re.findall(
2692 r'''(?xs)data-video-username=".*?".*?
2693 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2695 # Fetch new pages until all the videos are repeated, it seems that
2696 # there are always 51 unique videos.
2697 new_ids = [_id for _id in new_ids if _id not in ids]
2703 url_results = self._ids_to_results(ids)
2705 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2707 search_title('playlist-title')
2708 or search_title('title long-title')
2709 or search_title('title'))
2710 title = clean_html(title_span)
2712 return self.playlist_result(url_results, playlist_id, title)
2714 def _extract_playlist(self, playlist_id):
2715 url = self._TEMPLATE_URL % playlist_id
2716 page = self._download_webpage(url, playlist_id)
2718 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2719 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2720 match = match.strip()
2721 # Check if the playlist exists or is private
2722 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2724 reason = mobj.group('reason')
2725 message = 'This playlist %s' % reason
2726 if 'private' in reason:
2727 message += ', use --username or --netrc to access it'
2729 raise ExtractorError(message, expected=True)
2730 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2731 raise ExtractorError(
2732 'Invalid parameters. Maybe URL is incorrect.',
2734 elif re.match(r'[^<]*Choose your language[^<]*', match):
2737 self.report_warning('Youtube gives an alert message: ' + match)
2739 playlist_title = self._html_search_regex(
2740 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2741 page, 'title', default=None)
2743 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2744 uploader = self._html_search_regex(
2745 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2746 page, 'uploader', default=None)
2748 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2751 uploader_id = mobj.group('uploader_id')
2752 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2754 uploader_id = uploader_url = None
2758 if not playlist_title:
2760 # Some playlist URLs don't actually serve a playlist (e.g.
2761 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2762 next(self._entries(page, playlist_id))
2763 except StopIteration:
2766 playlist = self.playlist_result(
2767 self._entries(page, playlist_id), playlist_id, playlist_title)
2769 'uploader': uploader,
2770 'uploader_id': uploader_id,
2771 'uploader_url': uploader_url,
2774 return has_videos, playlist
2776 def _check_download_just_video(self, url, playlist_id):
2777 # Check if it's a video-specific URL
2778 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2779 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2780 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2781 'video id', default=None)
2783 if self._downloader.params.get('noplaylist'):
2784 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2785 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2787 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2788 return video_id, None
2791 def _real_extract(self, url):
2792 # Extract playlist id
2793 mobj = re.match(self._VALID_URL, url)
2795 raise ExtractorError('Invalid URL: %s' % url)
2796 playlist_id = mobj.group(1) or mobj.group(2)
2798 video_id, video = self._check_download_just_video(url, playlist_id)
2802 if playlist_id.startswith(('RD', 'UL', 'PU')):
2803 # Mixes require a custom extraction process
2804 return self._extract_mix(playlist_id)
2806 has_videos, playlist = self._extract_playlist(playlist_id)
2807 if has_videos or not video_id:
2810 # Some playlist URLs don't actually serve a playlist (see
2811 # https://github.com/ytdl-org/youtube-dl/issues/10537).
2812 # Fallback to plain video extraction if there is a video id
2813 # along with playlist id.
2814 return self.url_result(video_id, 'Youtube', video_id=video_id)
2817 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2818 IE_DESC = 'YouTube.com channels'
2819 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2820 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2821 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2822 IE_NAME = 'youtube:channel'
2824 'note': 'paginated channel',
2825 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2826 'playlist_mincount': 91,
2828 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2829 'title': 'Uploads from lex will',
2830 'uploader': 'lex will',
2831 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2834 'note': 'Age restricted channel',
2835 # from https://www.youtube.com/user/DeusExOfficial
2836 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2837 'playlist_mincount': 64,
2839 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2840 'title': 'Uploads from Deus Ex',
2841 'uploader': 'Deus Ex',
2842 'uploader_id': 'DeusExOfficial',
2845 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2846 'only_matching': True,
2850 def suitable(cls, url):
2851 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2852 else super(YoutubeChannelIE, cls).suitable(url))
2854 def _build_template_url(self, url, channel_id):
2855 return self._TEMPLATE_URL % channel_id
2857 def _real_extract(self, url):
2858 channel_id = self._match_id(url)
2860 url = self._build_template_url(url, channel_id)
2862 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2863 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2864 # otherwise fallback on channel by page extraction
2865 channel_page = self._download_webpage(
2866 url + '?view=57', channel_id,
2867 'Downloading channel page', fatal=False)
2868 if channel_page is False:
2869 channel_playlist_id = False
2871 channel_playlist_id = self._html_search_meta(
2872 'channelId', channel_page, 'channel id', default=None)
2873 if not channel_playlist_id:
2874 channel_url = self._html_search_meta(
2875 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2876 channel_page, 'channel url', default=None)
2878 channel_playlist_id = self._search_regex(
2879 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2880 channel_url, 'channel id', default=None)
2881 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2882 playlist_id = 'UU' + channel_playlist_id[2:]
2883 return self.url_result(
2884 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2886 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2887 autogenerated = re.search(r'''(?x)
2889 channel-header-autogenerated-label|
2890 yt-channel-title-autogenerated
2891 )[^"]*"''', channel_page) is not None
2894 # The videos are contained in a single page
2895 # the ajax pages can't be used, they are empty
2898 video_id, 'Youtube', video_id=video_id,
2899 video_title=video_title)
2900 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2901 return self.playlist_result(entries, channel_id)
2904 next(self._entries(channel_page, channel_id))
2905 except StopIteration:
2906 alert_message = self._html_search_regex(
2907 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2908 channel_page, 'alert', default=None, group='alert')
2910 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2912 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2915 class YoutubeUserIE(YoutubeChannelIE):
2916 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2917 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2918 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2919 IE_NAME = 'youtube:user'
2922 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2923 'playlist_mincount': 320,
2925 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2926 'title': 'Uploads from The Linux Foundation',
2927 'uploader': 'The Linux Foundation',
2928 'uploader_id': 'TheLinuxFoundation',
2931 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2932 # but not https://www.youtube.com/user/12minuteathlete/videos
2933 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2934 'playlist_mincount': 249,
2936 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2937 'title': 'Uploads from 12 Minute Athlete',
2938 'uploader': '12 Minute Athlete',
2939 'uploader_id': 'the12minuteathlete',
2942 'url': 'ytuser:phihag',
2943 'only_matching': True,
2945 'url': 'https://www.youtube.com/c/gametrailers',
2946 'only_matching': True,
2948 'url': 'https://www.youtube.com/gametrailers',
2949 'only_matching': True,
2951 # This channel is not available, geo restricted to JP
2952 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2953 'only_matching': True,
2957 def suitable(cls, url):
2958 # Don't return True if the url can be extracted with other youtube
2959 # extractor, the regex would is too permissive and it would match.
2960 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2961 if any(ie.suitable(url) for ie in other_yt_ies):
2964 return super(YoutubeUserIE, cls).suitable(url)
2966 def _build_template_url(self, url, channel_id):
2967 mobj = re.match(self._VALID_URL, url)
2968 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2971 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2972 IE_DESC = 'YouTube.com live streams'
2973 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2974 IE_NAME = 'youtube:live'
2977 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2979 'id': 'a48o2S1cPoo',
2981 'title': 'The Young Turks - Live Main Show',
2982 'uploader': 'The Young Turks',
2983 'uploader_id': 'TheYoungTurks',
2984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2985 'upload_date': '20150715',
2986 'license': 'Standard YouTube License',
2987 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2988 'categories': ['News & Politics'],
2989 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2991 'dislike_count': int,
2994 'skip_download': True,
2997 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2998 'only_matching': True,
3000 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3001 'only_matching': True,
3003 'url': 'https://www.youtube.com/TheYoungTurks/live',
3004 'only_matching': True,
3007 def _real_extract(self, url):
3008 mobj = re.match(self._VALID_URL, url)
3009 channel_id = mobj.group('id')
3010 base_url = mobj.group('base_url')
3011 webpage = self._download_webpage(url, channel_id, fatal=False)
3013 page_type = self._og_search_property(
3014 'type', webpage, 'page type', default='')
3015 video_id = self._html_search_meta(
3016 'videoId', webpage, 'video id', default=None)
3017 if page_type.startswith('video') and video_id and re.match(
3018 r'^[0-9A-Za-z_-]{11}$', video_id):
3019 return self.url_result(video_id, YoutubeIE.ie_key())
3020 return self.url_result(base_url)
3023 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3024 IE_DESC = 'YouTube.com user/channel playlists'
3025 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3026 IE_NAME = 'youtube:playlists'
3029 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3030 'playlist_mincount': 4,
3032 'id': 'ThirstForScience',
3033 'title': 'ThirstForScience',
3036 # with "Load more" button
3037 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3038 'playlist_mincount': 70,
3041 'title': 'Игорь Клейнер',
3044 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3045 'playlist_mincount': 17,
3047 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3048 'title': 'Chem Player',
3054 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3055 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3058 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3059 IE_DESC = 'YouTube.com searches'
3060 # there doesn't appear to be a real limit, for example if you search for
3061 # 'python' you get more than 8.000.000 results
3062 _MAX_RESULTS = float('inf')
3063 IE_NAME = 'youtube:search'
3064 _SEARCH_KEY = 'ytsearch'
3065 _EXTRA_QUERY_ARGS = {}
3068 def _get_n_results(self, query, n):
3069 """Get a specified number of results for a query"""
3075 'search_query': query.encode('utf-8'),
3077 url_query.update(self._EXTRA_QUERY_ARGS)
3078 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3080 for pagenum in itertools.count(1):
3081 data = self._download_json(
3082 result_url, video_id='query "%s"' % query,
3083 note='Downloading page %s' % pagenum,
3084 errnote='Unable to download API page',
3085 query={'spf': 'navigate'})
3086 html_content = data[1]['body']['content']
3088 if 'class="search-message' in html_content:
3089 raise ExtractorError(
3090 '[youtube] No video results', expected=True)
3092 new_videos = list(self._process_page(html_content))
3093 videos += new_videos
3094 if not new_videos or len(videos) > limit:
3096 next_link = self._html_search_regex(
3097 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3098 html_content, 'next link', default=None)
3099 if next_link is None:
3101 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3105 return self.playlist_result(videos, query)
3108 class YoutubeSearchDateIE(YoutubeSearchIE):
3109 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3110 _SEARCH_KEY = 'ytsearchdate'
3111 IE_DESC = 'YouTube.com searches, newest videos first'
3112 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3115 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3116 IE_DESC = 'YouTube.com search URLs'
3117 IE_NAME = 'youtube:search_url'
3118 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3120 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3121 'playlist_mincount': 5,
3123 'title': 'youtube-dl test video',
3126 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3127 'only_matching': True,
3130 def _real_extract(self, url):
3131 mobj = re.match(self._VALID_URL, url)
3132 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3133 webpage = self._download_webpage(url, query)
3134 return self.playlist_result(self._process_page(webpage), playlist_title=query)
3137 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3138 IE_DESC = 'YouTube.com (multi-season) shows'
3139 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3140 IE_NAME = 'youtube:show'
3142 'url': 'https://www.youtube.com/show/airdisasters',
3143 'playlist_mincount': 5,
3145 'id': 'airdisasters',
3146 'title': 'Air Disasters',
3150 def _real_extract(self, url):
3151 playlist_id = self._match_id(url)
3152 return super(YoutubeShowIE, self)._real_extract(
3153 'https://www.youtube.com/show/%s/playlists' % playlist_id)
3156 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3158 Base class for feed extractors
3159 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3161 _LOGIN_REQUIRED = True
3165 return 'youtube:%s' % self._FEED_NAME
3167 def _real_initialize(self):
3170 def _entries(self, page):
3171 # The extraction process is the same as for playlists, but the regex
3172 # for the video ids doesn't contain an index
3174 more_widget_html = content_html = page
3175 for page_num in itertools.count(1):
3176 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3178 # 'recommended' feed has infinite 'load more' and each new portion spins
3179 # the same videos in (sometimes) slightly different order, so we'll check
3180 # for unicity and break when portion has no new videos
3181 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3187 for entry in self._ids_to_results(new_ids):
3190 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3194 more = self._download_json(
3195 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3196 'Downloading page #%s' % page_num,
3197 transform_source=uppercase_escape)
3198 content_html = more['content_html']
3199 more_widget_html = more['load_more_widget_html']
3201 def _real_extract(self, url):
3202 page = self._download_webpage(
3203 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3204 self._PLAYLIST_TITLE)
3205 return self.playlist_result(
3206 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3209 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3210 IE_NAME = 'youtube:watchlater'
3211 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3212 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3215 'url': 'https://www.youtube.com/playlist?list=WL',
3216 'only_matching': True,
3218 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3219 'only_matching': True,
3222 def _real_extract(self, url):
3223 _, video = self._check_download_just_video(url, 'WL')
3226 _, playlist = self._extract_playlist('WL')
3230 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3231 IE_NAME = 'youtube:favorites'
3232 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3233 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3234 _LOGIN_REQUIRED = True
3236 def _real_extract(self, url):
3237 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3238 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3239 return self.url_result(playlist_id, 'YoutubePlaylist')
3242 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3243 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3244 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3245 _FEED_NAME = 'recommended'
3246 _PLAYLIST_TITLE = 'Youtube Recommended videos'
3249 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3250 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3251 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3252 _FEED_NAME = 'subscriptions'
3253 _PLAYLIST_TITLE = 'Youtube Subscriptions'
3256 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3257 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3258 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3259 _FEED_NAME = 'history'
3260 _PLAYLIST_TITLE = 'Youtube History'
3263 class YoutubeTruncatedURLIE(InfoExtractor):
3264 IE_NAME = 'youtube:truncated_url'
3265 IE_DESC = False # Do not list
3266 _VALID_URL = r'''(?x)
3268 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3271 annotation_id=annotation_[^&]+|
3277 attribution_link\?a=[^&]+
3283 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3284 'only_matching': True,
3286 'url': 'https://www.youtube.com/watch?',
3287 'only_matching': True,
3289 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3290 'only_matching': True,
3292 'url': 'https://www.youtube.com/watch?feature=foo',
3293 'only_matching': True,
3295 'url': 'https://www.youtube.com/watch?hl=en-GB',
3296 'only_matching': True,
3298 'url': 'https://www.youtube.com/watch?t=2372',
3299 'only_matching': True,
3302 def _real_extract(self, url):
3303 raise ExtractorError(
3304 'Did you forget to quote the URL? Remember that & is a meta '
3305 'character in most shells, so you want to put the URL in quotes, '
3307 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3308 ' or simply youtube-dl BaW_jenozKc .',
3312 class YoutubeTruncatedIDIE(InfoExtractor):
3313 IE_NAME = 'youtube:truncated_id'
3314 IE_DESC = False # Do not list
3315 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3318 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3319 'only_matching': True,
3322 def _real_extract(self, url):
3323 video_id = self._match_id(url)
3324 raise ExtractorError(
3325 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),