3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
23 compat_urllib_parse_urlencode,
24 compat_urllib_parse_urlparse,
33 get_element_by_attribute,
54 class YoutubeBaseInfoExtractor(InfoExtractor):
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
63 _NETRC_MACHINE = 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED = False
67 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
69 def _set_language(self):
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
72 # YouTube sets the expire time to about two months
73 expire_time=time.time() + 2 * 30 * 24 * 3600)
75 def _ids_to_results(self, ids):
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 username, password = self._get_login_info()
89 # No authentication to be performed
91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
99 if login_page is False:
102 login_form = self._hidden_inputs(login_page)
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
112 'f.req': json.dumps(f_req),
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
126 self._downloader.report_warning(message)
130 None, [], None, 'US', None, None, 2, False, True,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 1, [None, None, []], None, None, None, True
141 lookup_results = req(
142 self._LOOKUP_URL, lookup_req,
143 'Looking up account info', 'Unable to look up account info')
145 if lookup_results is False:
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
150 warn('Unable to extract user hash')
155 None, 1, None, [1, None, None, None, [password, None, True]],
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
165 if challenge_results is False:
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
178 warn('Unable to extract result entry')
181 login_challenge = try_get(res, lambda x: x[0][0], list)
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status = try_get(login_challenge, lambda x: x[5], compat_str)
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
194 warn('Unable to extract TL')
197 tfa_code = self._get_tfa_info('2-step verification code')
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 tfa_code = remove_start(tfa_code, 'G-')
208 user_hash, None, 2, None,
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
218 if tfa_results is False:
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 challenge = CHALLENGES.get(
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
249 check_cookie_results = self._download_webpage(
250 check_cookie_url, None, 'Checking cookie', fatal=False)
252 if check_cookie_results is False:
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
261 def _download_webpage_handle(self, *args, **kwargs):
262 kwargs.setdefault('query', {})['disable_polymer'] = 'true'
263 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
264 *args, **compat_kwargs(kwargs))
266 def _real_initialize(self):
267 if self._downloader is None:
270 if not self._login():
274 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
275 # Extract entries from page with "Load more" button
276 def _entries(self, page, playlist_id):
277 more_widget_html = content_html = page
278 for page_num in itertools.count(1):
279 for entry in self._process_page(content_html):
282 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
286 more = self._download_json(
287 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
288 'Downloading page #%s' % page_num,
289 transform_source=uppercase_escape)
290 content_html = more['content_html']
291 if not content_html.strip():
292 # Some webpages show a "Load more" button but they don't
295 more_widget_html = more['load_more_widget_html']
298 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
299 def _process_page(self, content):
300 for video_id, video_title in self.extract_videos_from_page(content):
301 yield self.url_result(video_id, 'Youtube', video_id, video_title)
303 def extract_videos_from_page(self, page):
306 for mobj in re.finditer(self._VIDEO_RE, page):
307 # The link with index 0 is not the first video of the playlist (not sure if still actual)
308 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
310 video_id = mobj.group('id')
311 video_title = unescapeHTML(mobj.group('title'))
313 video_title = video_title.strip()
315 idx = ids_in_page.index(video_id)
316 if video_title and not titles_in_page[idx]:
317 titles_in_page[idx] = video_title
319 ids_in_page.append(video_id)
320 titles_in_page.append(video_title)
321 return zip(ids_in_page, titles_in_page)
324 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
325 def _process_page(self, content):
326 for playlist_id in orderedSet(re.findall(
327 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
329 yield self.url_result(
330 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
332 def _real_extract(self, url):
333 playlist_id = self._match_id(url)
334 webpage = self._download_webpage(url, playlist_id)
335 title = self._og_search_title(webpage, fatal=False)
336 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
339 class YoutubeIE(YoutubeBaseInfoExtractor):
340 IE_DESC = 'YouTube.com'
341 _VALID_URL = r"""(?x)^
343 (?:https?://|//) # http(s):// or protocol-independent URL
344 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
345 (?:www\.)?deturl\.com/www\.youtube\.com/|
346 (?:www\.)?pwnyoutube\.com/|
347 (?:www\.)?hooktube\.com/|
348 (?:www\.)?yourepeat\.com/|
349 tube\.majestyc\.net/|
350 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
351 (?:.*?\#/)? # handle anchor (#/) redirect urls
352 (?: # the various things that can precede the ID:
353 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
354 |(?: # or the v= param in all its forms
355 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
356 (?:\?|\#!?) # the params delimiter ? or # or #!
357 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
362 youtu\.be| # just youtu.be/xxxx
363 vid\.plus| # or vid.plus/xxxx
364 zwearz\.com/watch| # or zwearz.com/watch/xxxx
366 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
368 )? # all until now is optional -> you can pass the naked ID
369 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
372 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
373 WL # WL are handled by the watch later IE
376 (?(1).+)? # if we found the ID, everything can follow
377 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
378 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
380 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
381 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
382 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
383 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
384 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
385 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
386 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
387 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
388 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
389 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
390 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
391 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
392 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
393 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
394 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
395 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
396 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
397 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
401 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
402 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
403 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
404 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
405 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
406 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
407 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
409 # Apple HTTP Live Streaming
410 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
411 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
412 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
413 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
414 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
415 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
416 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
417 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
420 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
421 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
422 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
423 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
424 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
425 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
426 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
427 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
428 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
429 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
430 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
431 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
434 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
435 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
436 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
437 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
438 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
439 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
440 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
443 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
444 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
445 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
446 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
447 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
448 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
449 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
450 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
451 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
452 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
454 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
456 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
458 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
459 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
461 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
462 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
463 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
467 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
468 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
470 # Dash webm audio with opus inside
471 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
472 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
473 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
476 '_rtmp': {'protocol': 'rtmp'},
478 _SUBTITLE_FORMATS = ('ttml', 'vtt')
485 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
489 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
490 'uploader': 'Philipp Hagemeister',
491 'uploader_id': 'phihag',
492 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
493 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
494 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
495 'upload_date': '20121002',
496 'license': 'Standard YouTube License',
497 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
498 'categories': ['Science & Technology'],
499 'tags': ['youtube-dl'],
502 'dislike_count': int,
508 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
509 'note': 'Test generic use_cipher_signature video (#897)',
513 'upload_date': '20120506',
514 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
515 'alt_title': 'I Love It (feat. Charli XCX)',
516 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
517 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
518 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
519 'iconic ep', 'iconic', 'love', 'it'],
521 'uploader': 'Icona Pop',
522 'uploader_id': 'IconaPop',
523 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
524 'license': 'Standard YouTube License',
525 'creator': 'Icona Pop',
526 'track': 'I Love It (feat. Charli XCX)',
527 'artist': 'Icona Pop',
531 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
532 'note': 'Test VEVO video with age protection (#956)',
536 'upload_date': '20130703',
537 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
538 'alt_title': 'Tunnel Vision',
539 'description': 'md5:64249768eec3bc4276236606ea996373',
541 'uploader': 'justintimberlakeVEVO',
542 'uploader_id': 'justintimberlakeVEVO',
543 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
544 'license': 'Standard YouTube License',
545 'creator': 'Justin Timberlake',
546 'track': 'Tunnel Vision',
547 'artist': 'Justin Timberlake',
552 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
553 'note': 'Embed-only video (#1746)',
557 'upload_date': '20120608',
558 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
559 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
560 'uploader': 'SET India',
561 'uploader_id': 'setindia',
562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
563 'license': 'Standard YouTube License',
568 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
569 'note': 'Use the first video ID in the URL',
573 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
574 'uploader': 'Philipp Hagemeister',
575 'uploader_id': 'phihag',
576 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
577 'upload_date': '20121002',
578 'license': 'Standard YouTube License',
579 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
580 'categories': ['Science & Technology'],
581 'tags': ['youtube-dl'],
584 'dislike_count': int,
587 'skip_download': True,
591 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
592 'note': '256k DASH audio (format 141) via DASH manifest',
596 'upload_date': '20121002',
597 'uploader_id': '8KVIDEO',
598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
600 'uploader': '8KVIDEO',
601 'license': 'Standard YouTube License',
602 'title': 'UHDTV TEST 8K VIDEO.mp4'
605 'youtube_include_dash_manifest': True,
608 'skip': 'format 141 not served anymore',
610 # DASH manifest with encrypted signature
612 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
616 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
617 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
619 'uploader': 'AfrojackVEVO',
620 'uploader_id': 'AfrojackVEVO',
621 'upload_date': '20131011',
622 'license': 'Standard YouTube License',
625 'youtube_include_dash_manifest': True,
626 'format': '141/bestaudio[ext=m4a]',
629 # JS player signature function name containing $
631 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
635 'title': 'Taylor Swift - Shake It Off',
636 'alt_title': 'Shake It Off',
637 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
639 'uploader': 'TaylorSwiftVEVO',
640 'uploader_id': 'TaylorSwiftVEVO',
641 'upload_date': '20140818',
642 'license': 'Standard YouTube License',
643 'creator': 'Taylor Swift',
646 'youtube_include_dash_manifest': True,
647 'format': '141/bestaudio[ext=m4a]',
652 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
657 'upload_date': '20100909',
658 'uploader': 'TJ Kirk',
659 'uploader_id': 'TheAmazingAtheist',
660 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
661 'license': 'Standard YouTube License',
662 'title': 'Burning Everyone\'s Koran',
663 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
666 # Normal age-gate video (No vevo, embed allowed)
668 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
672 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
673 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
675 'uploader': 'The Witcher',
676 'uploader_id': 'WitcherGame',
677 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
678 'upload_date': '20140605',
679 'license': 'Standard YouTube License',
683 # Age-gate video with encrypted signature
685 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
689 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
690 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
692 'uploader': 'LloydVEVO',
693 'uploader_id': 'LloydVEVO',
694 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
695 'upload_date': '20110629',
696 'license': 'Standard YouTube License',
700 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
701 # YouTube Red ad is not captured for creator
703 'url': '__2ABJjxzNo',
708 'upload_date': '20100430',
709 'uploader_id': 'deadmau5',
710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
711 'creator': 'deadmau5',
712 'description': 'md5:12c56784b8032162bb936a5f76d55360',
713 'uploader': 'deadmau5',
714 'license': 'Standard YouTube License',
715 'title': 'Deadmau5 - Some Chords (HD)',
716 'alt_title': 'Some Chords',
718 'expected_warnings': [
719 'DASH manifest missing',
722 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
724 'url': 'lqQg6PlCWgI',
729 'upload_date': '20150827',
730 'uploader_id': 'olympic',
731 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
732 'license': 'Standard YouTube License',
733 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
734 'uploader': 'Olympic',
735 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
738 'skip_download': 'requires avconv',
743 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
747 'stretched_ratio': 16 / 9.,
749 'upload_date': '20110310',
750 'uploader_id': 'AllenMeow',
751 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
752 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
754 'license': 'Standard YouTube License',
755 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
758 # url_encoded_fmt_stream_map is empty string
760 'url': 'qEJwOuvDf7I',
764 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
766 'upload_date': '20150404',
767 'uploader_id': 'spbelect',
768 'uploader': 'Наблюдатели Петербурга',
771 'skip_download': 'requires avconv',
773 'skip': 'This live event has ended.',
775 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
777 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
781 'title': 'md5:7b81415841e02ecd4313668cde88737a',
782 'description': 'md5:116377fd2963b81ec4ce64b542173306',
784 'upload_date': '20150625',
785 'uploader_id': 'dorappi2000',
786 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
787 'uploader': 'dorappi2000',
788 'license': 'Standard YouTube License',
789 'formats': 'mincount:31',
791 'skip': 'not actual anymore',
793 # DASH manifest with segment_list
795 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
796 'md5': '8ce563a1d667b599d21064e982ab9e31',
800 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
801 'uploader': 'Airtek',
802 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
803 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
804 'license': 'Standard YouTube License',
805 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
808 'youtube_include_dash_manifest': True,
809 'format': '135', # bestvideo
811 'skip': 'This live event has ended.',
814 # Multifeed videos (multiple cameras), URL is for Main Camera
815 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
818 'title': 'teamPGP: Rocket League Noob Stream',
819 'description': 'md5:dc7872fb300e143831327f1bae3af010',
825 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
826 'description': 'md5:dc7872fb300e143831327f1bae3af010',
828 'upload_date': '20150721',
829 'uploader': 'Beer Games Beer',
830 'uploader_id': 'beergamesbeer',
831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
832 'license': 'Standard YouTube License',
838 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
839 'description': 'md5:dc7872fb300e143831327f1bae3af010',
841 'upload_date': '20150721',
842 'uploader': 'Beer Games Beer',
843 'uploader_id': 'beergamesbeer',
844 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
845 'license': 'Standard YouTube License',
851 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
854 'upload_date': '20150721',
855 'uploader': 'Beer Games Beer',
856 'uploader_id': 'beergamesbeer',
857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
858 'license': 'Standard YouTube License',
864 'title': 'teamPGP: Rocket League Noob Stream (zim)',
865 'description': 'md5:dc7872fb300e143831327f1bae3af010',
867 'upload_date': '20150721',
868 'uploader': 'Beer Games Beer',
869 'uploader_id': 'beergamesbeer',
870 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
871 'license': 'Standard YouTube License',
875 'skip_download': True,
879 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
880 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
883 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
886 'skip': 'Not multifeed anymore',
889 'url': 'https://vid.plus/FlRa-iH7PGw',
890 'only_matching': True,
893 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
894 'only_matching': True,
897 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
898 # Also tests cut-off URL expansion in video description (see
899 # https://github.com/rg3/youtube-dl/issues/1892,
900 # https://github.com/rg3/youtube-dl/issues/8164)
901 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
905 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
906 'alt_title': 'Dark Walk - Position Music',
907 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
909 'upload_date': '20151119',
910 'uploader_id': 'IronSoulElf',
911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
912 'uploader': 'IronSoulElf',
913 'license': 'Standard YouTube License',
914 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
915 'track': 'Dark Walk - Position Music',
916 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
919 'skip_download': True,
923 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
924 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
925 'only_matching': True,
928 # Video with yt:stretch=17:0
929 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
933 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
934 'description': 'md5:ee18a25c350637c8faff806845bddee9',
935 'upload_date': '20151107',
936 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
937 'uploader': 'CH GAMER DROID',
940 'skip_download': True,
942 'skip': 'This video does not exist.',
945 # Video licensed under Creative Commons
946 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
950 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
951 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
953 'upload_date': '20150127',
954 'uploader_id': 'BerkmanCenter',
955 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
956 'uploader': 'The Berkman Klein Center for Internet & Society',
957 'license': 'Creative Commons Attribution license (reuse allowed)',
960 'skip_download': True,
964 # Channel-like uploader_url
965 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
969 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
970 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
972 'upload_date': '20151119',
973 'uploader': 'Bernie Sanders',
974 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
975 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
976 'license': 'Creative Commons Attribution license (reuse allowed)',
979 'skip_download': True,
983 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
984 'only_matching': True,
987 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
988 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
989 'only_matching': True,
992 # Rental video preview
993 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
997 'title': 'Piku - Trailer',
998 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
999 'upload_date': '20150811',
1000 'uploader': 'FlixMatrix',
1001 'uploader_id': 'FlixMatrixKaravan',
1002 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1003 'license': 'Standard YouTube License',
1006 'skip_download': True,
1008 'skip': 'This video is not available.',
1011 # YouTube Red video with episode data
1012 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1014 'id': 'iqKdEhx-dD4',
1016 'title': 'Isolation - Mind Field (Ep 1)',
1017 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1019 'upload_date': '20170118',
1020 'uploader': 'Vsauce',
1021 'uploader_id': 'Vsauce',
1022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1023 'license': 'Standard YouTube License',
1024 'series': 'Mind Field',
1026 'episode_number': 1,
1029 'skip_download': True,
1031 'expected_warnings': [
1032 'Skipping DASH manifest',
1036 # The following content has been identified by the YouTube community
1037 # as inappropriate or offensive to some audiences.
1038 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1040 'id': '6SJNVb0GnPI',
1042 'title': 'Race Differences in Intelligence',
1043 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1045 'upload_date': '20140124',
1046 'uploader': 'New Century Foundation',
1047 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1048 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1049 'license': 'Standard YouTube License',
1052 'skip_download': True,
1057 'url': '1t24XAntNCY',
1058 'only_matching': True,
1061 # geo restricted to JP
1062 'url': 'sJL6WA-aGkQ',
1063 'only_matching': True,
1066 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1067 'only_matching': True,
1071 def __init__(self, *args, **kwargs):
1072 super(YoutubeIE, self).__init__(*args, **kwargs)
1073 self._player_cache = {}
1075 def report_video_info_webpage_download(self, video_id):
1076 """Report attempt to download video info webpage."""
1077 self.to_screen('%s: Downloading video info webpage' % video_id)
1079 def report_information_extraction(self, video_id):
1080 """Report attempt to extract video information."""
1081 self.to_screen('%s: Extracting video information' % video_id)
1083 def report_unavailable_format(self, video_id, format):
1084 """Report extracted video URL."""
1085 self.to_screen('%s: Format %s not available' % (video_id, format))
1087 def report_rtmp_download(self):
1088 """Indicate the download will use the RTMP protocol."""
1089 self.to_screen('RTMP download detected')
1091 def _signature_cache_id(self, example_sig):
1092 """ Return a string representation of a signature """
1093 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1095 def _extract_signature_function(self, video_id, player_url, example_sig):
1097 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1100 raise ExtractorError('Cannot identify player %r' % player_url)
1101 player_type = id_m.group('ext')
1102 player_id = id_m.group('id')
1104 # Read from filesystem cache
1105 func_id = '%s_%s_%s' % (
1106 player_type, player_id, self._signature_cache_id(example_sig))
1107 assert os.path.basename(func_id) == func_id
1109 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1110 if cache_spec is not None:
1111 return lambda s: ''.join(s[i] for i in cache_spec)
1114 'Downloading player %s' % player_url
1115 if self._downloader.params.get('verbose') else
1116 'Downloading %s player %s' % (player_type, player_id)
1118 if player_type == 'js':
1119 code = self._download_webpage(
1120 player_url, video_id,
1122 errnote='Download of %s failed' % player_url)
1123 res = self._parse_sig_js(code)
1124 elif player_type == 'swf':
1125 urlh = self._request_webpage(
1126 player_url, video_id,
1128 errnote='Download of %s failed' % player_url)
1130 res = self._parse_sig_swf(code)
1132 assert False, 'Invalid player type %r' % player_type
1134 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1135 cache_res = res(test_string)
1136 cache_spec = [ord(c) for c in cache_res]
1138 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1141 def _print_sig_code(self, func, example_sig):
1142 def gen_sig_code(idxs):
1143 def _genslice(start, end, step):
1144 starts = '' if start == 0 else str(start)
1145 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1146 steps = '' if step == 1 else (':%d' % step)
1147 return 's[%s%s%s]' % (starts, ends, steps)
1150 # Quelch pyflakes warnings - start will be set when step is set
1151 start = '(Never used)'
1152 for i, prev in zip(idxs[1:], idxs[:-1]):
1153 if step is not None:
1154 if i - prev == step:
1156 yield _genslice(start, prev, step)
1159 if i - prev in [-1, 1]:
1164 yield 's[%d]' % prev
1168 yield _genslice(start, i, step)
1170 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1171 cache_res = func(test_string)
1172 cache_spec = [ord(c) for c in cache_res]
1173 expr_code = ' + '.join(gen_sig_code(cache_spec))
1174 signature_id_tuple = '(%s)' % (
1175 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1176 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1177 ' return %s\n') % (signature_id_tuple, expr_code)
1178 self.to_screen('Extracted signature function:\n' + code)
1180 def _parse_sig_js(self, jscode):
1181 funcname = self._search_regex(
1182 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1183 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1184 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1185 r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1186 jscode, 'Initial JS player signature function name', group='sig')
1188 jsi = JSInterpreter(jscode)
1189 initial_function = jsi.extract_function(funcname)
1190 return lambda s: initial_function([s])
1192 def _parse_sig_swf(self, file_contents):
1193 swfi = SWFInterpreter(file_contents)
1194 TARGET_CLASSNAME = 'SignatureDecipher'
1195 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1196 initial_function = swfi.extract_function(searched_class, 'decipher')
1197 return lambda s: initial_function([s])
1199 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1200 """Turn the encrypted s field into a working signature"""
1202 if player_url is None:
1203 raise ExtractorError('Cannot decrypt signature without player_url')
1205 if player_url.startswith('//'):
1206 player_url = 'https:' + player_url
1207 elif not re.match(r'https?://', player_url):
1208 player_url = compat_urlparse.urljoin(
1209 'https://www.youtube.com', player_url)
1211 player_id = (player_url, self._signature_cache_id(s))
1212 if player_id not in self._player_cache:
1213 func = self._extract_signature_function(
1214 video_id, player_url, s
1216 self._player_cache[player_id] = func
1217 func = self._player_cache[player_id]
1218 if self._downloader.params.get('youtube_print_sig_code'):
1219 self._print_sig_code(func, s)
1221 except Exception as e:
1222 tb = traceback.format_exc()
1223 raise ExtractorError(
1224 'Signature extraction failed: ' + tb, cause=e)
1226 def _get_subtitles(self, video_id, webpage):
1228 subs_doc = self._download_xml(
1229 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1230 video_id, note=False)
1231 except ExtractorError as err:
1232 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1236 for track in subs_doc.findall('track'):
1237 lang = track.attrib['lang_code']
1238 if lang in sub_lang_list:
1241 for ext in self._SUBTITLE_FORMATS:
1242 params = compat_urllib_parse_urlencode({
1246 'name': track.attrib['name'].encode('utf-8'),
1248 sub_formats.append({
1249 'url': 'https://www.youtube.com/api/timedtext?' + params,
1252 sub_lang_list[lang] = sub_formats
1253 if not sub_lang_list:
1254 self._downloader.report_warning('video doesn\'t have subtitles')
1256 return sub_lang_list
1258 def _get_ytplayer_config(self, video_id, webpage):
1260 # User data may contain arbitrary character sequences that may affect
1261 # JSON extraction with regex, e.g. when '};' is contained the second
1262 # regex won't capture the whole JSON. Yet working around by trying more
1263 # concrete regex first keeping in mind proper quoted string handling
1264 # to be implemented in future that will replace this workaround (see
1265 # https://github.com/rg3/youtube-dl/issues/7468,
1266 # https://github.com/rg3/youtube-dl/pull/7599)
1267 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1268 r';ytplayer\.config\s*=\s*({.+?});',
1270 config = self._search_regex(
1271 patterns, webpage, 'ytplayer.config', default=None)
1273 return self._parse_json(
1274 uppercase_escape(config), video_id, fatal=False)
1276 def _get_automatic_captions(self, video_id, webpage):
1277 """We need the webpage for getting the captions url, pass it as an
1278 argument to speed up the process."""
1279 self.to_screen('%s: Looking for automatic captions' % video_id)
1280 player_config = self._get_ytplayer_config(video_id, webpage)
1281 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1282 if not player_config:
1283 self._downloader.report_warning(err_msg)
1286 args = player_config['args']
1287 caption_url = args.get('ttsurl')
1289 timestamp = args['timestamp']
1290 # We get the available subtitles
1291 list_params = compat_urllib_parse_urlencode({
1296 list_url = caption_url + '&' + list_params
1297 caption_list = self._download_xml(list_url, video_id)
1298 original_lang_node = caption_list.find('track')
1299 if original_lang_node is None:
1300 self._downloader.report_warning('Video doesn\'t have automatic captions')
1302 original_lang = original_lang_node.attrib['lang_code']
1303 caption_kind = original_lang_node.attrib.get('kind', '')
1306 for lang_node in caption_list.findall('target'):
1307 sub_lang = lang_node.attrib['lang_code']
1309 for ext in self._SUBTITLE_FORMATS:
1310 params = compat_urllib_parse_urlencode({
1311 'lang': original_lang,
1315 'kind': caption_kind,
1317 sub_formats.append({
1318 'url': caption_url + '&' + params,
1321 sub_lang_list[sub_lang] = sub_formats
1322 return sub_lang_list
1324 def make_captions(sub_url, sub_langs):
1325 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1326 caption_qs = compat_parse_qs(parsed_sub_url.query)
1328 for sub_lang in sub_langs:
1330 for ext in self._SUBTITLE_FORMATS:
1332 'tlang': [sub_lang],
1335 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1336 query=compat_urllib_parse_urlencode(caption_qs, True)))
1337 sub_formats.append({
1341 captions[sub_lang] = sub_formats
1344 # New captions format as of 22.06.2017
1345 player_response = args.get('player_response')
1346 if player_response and isinstance(player_response, compat_str):
1347 player_response = self._parse_json(
1348 player_response, video_id, fatal=False)
1350 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1351 base_url = renderer['captionTracks'][0]['baseUrl']
1353 for lang in renderer['translationLanguages']:
1354 lang_code = lang.get('languageCode')
1356 sub_lang_list.append(lang_code)
1357 return make_captions(base_url, sub_lang_list)
1359 # Some videos don't provide ttsurl but rather caption_tracks and
1360 # caption_translation_languages (e.g. 20LmZk1hakA)
1361 # Does not used anymore as of 22.06.2017
1362 caption_tracks = args['caption_tracks']
1363 caption_translation_languages = args['caption_translation_languages']
1364 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1366 for lang in caption_translation_languages.split(','):
1367 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1368 sub_lang = lang_qs.get('lc', [None])[0]
1370 sub_lang_list.append(sub_lang)
1371 return make_captions(caption_url, sub_lang_list)
1372 # An extractor error can be raise by the download process if there are
1373 # no automatic captions but there are subtitles
1374 except (KeyError, IndexError, ExtractorError):
1375 self._downloader.report_warning(err_msg)
1378 def _mark_watched(self, video_id, video_info):
1379 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1380 if not playback_url:
1382 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1383 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1385 # cpn generation algorithm is reverse engineered from base.js.
1386 # In fact it works even with dummy cpn.
1387 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1388 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1394 playback_url = compat_urlparse.urlunparse(
1395 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1397 self._download_webpage(
1398 playback_url, video_id, 'Marking watched',
1399 'Unable to mark watched', fatal=False)
1402 def _extract_urls(webpage):
1403 # Embedded YouTube player
1405 unescapeHTML(mobj.group('url'))
1406 for mobj in re.finditer(r'''(?x)
1416 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1417 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1420 # lazyYT YouTube embed
1421 entries.extend(list(map(
1423 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1425 # Wordpress "YouTube Video Importer" plugin
1426 matches = re.findall(r'''(?x)<div[^>]+
1427 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1428 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1429 entries.extend(m[-1] for m in matches)
1434 def _extract_url(webpage):
1435 urls = YoutubeIE._extract_urls(webpage)
1436 return urls[0] if urls else None
1439 def extract_id(cls, url):
1440 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1442 raise ExtractorError('Invalid URL: %s' % url)
1443 video_id = mobj.group(2)
1446 def _extract_annotations(self, video_id):
1447 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1448 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1451 def _extract_chapters(description, duration):
1454 chapter_lines = re.findall(
1455 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1457 if not chapter_lines:
1460 for next_num, (chapter_line, time_point) in enumerate(
1461 chapter_lines, start=1):
1462 start_time = parse_duration(time_point)
1463 if start_time is None:
1465 if start_time > duration:
1467 end_time = (duration if next_num == len(chapter_lines)
1468 else parse_duration(chapter_lines[next_num][1]))
1469 if end_time is None:
1471 if end_time > duration:
1473 if start_time > end_time:
1475 chapter_title = re.sub(
1476 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1477 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1479 'start_time': start_time,
1480 'end_time': end_time,
1481 'title': chapter_title,
1485 def _real_extract(self, url):
1486 url, smuggled_data = unsmuggle_url(url, {})
1489 'http' if self._downloader.params.get('prefer_insecure', False)
1494 parsed_url = compat_urllib_parse_urlparse(url)
1495 for component in [parsed_url.fragment, parsed_url.query]:
1496 query = compat_parse_qs(component)
1497 if start_time is None and 't' in query:
1498 start_time = parse_duration(query['t'][0])
1499 if start_time is None and 'start' in query:
1500 start_time = parse_duration(query['start'][0])
1501 if end_time is None and 'end' in query:
1502 end_time = parse_duration(query['end'][0])
1504 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1505 mobj = re.search(self._NEXT_URL_RE, url)
1507 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1508 video_id = self.extract_id(url)
1511 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1512 video_webpage = self._download_webpage(url, video_id)
1514 # Attempt to extract SWF player URL
1515 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1516 if mobj is not None:
1517 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1523 def add_dash_mpd(video_info):
1524 dash_mpd = video_info.get('dashmpd')
1525 if dash_mpd and dash_mpd[0] not in dash_mpds:
1526 dash_mpds.append(dash_mpd[0])
1531 def extract_view_count(v_info):
1532 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1535 embed_webpage = None
1536 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1538 # We simulate the access to the video from www.youtube.com/v/{video_id}
1539 # this can be viewed without login into Youtube
1540 url = proto + '://www.youtube.com/embed/%s' % video_id
1541 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1542 data = compat_urllib_parse_urlencode({
1543 'video_id': video_id,
1544 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1545 'sts': self._search_regex(
1546 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1548 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1549 video_info_webpage = self._download_webpage(
1550 video_info_url, video_id,
1551 note='Refetching age-gated info webpage',
1552 errnote='unable to download video info webpage')
1553 video_info = compat_parse_qs(video_info_webpage)
1554 add_dash_mpd(video_info)
1559 # Try looking directly into the video webpage
1560 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1562 args = ytplayer_config['args']
1563 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1564 # Convert to the same format returned by compat_parse_qs
1565 video_info = dict((k, [v]) for k, v in args.items())
1566 add_dash_mpd(video_info)
1567 # Rental video is not rented but preview is available (e.g.
1568 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1569 # https://github.com/rg3/youtube-dl/issues/10532)
1570 if not video_info and args.get('ypc_vid'):
1571 return self.url_result(
1572 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1573 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1575 sts = ytplayer_config.get('sts')
1576 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1577 # We also try looking in get_video_info since it may contain different dashmpd
1578 # URL that points to a DASH manifest with possibly different itag set (some itags
1579 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1580 # manifest pointed by get_video_info's dashmpd).
1581 # The general idea is to take a union of itags of both DASH manifests (for example
1582 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1583 self.report_video_info_webpage_download(video_id)
1584 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1586 'video_id': video_id,
1596 video_info_webpage = self._download_webpage(
1597 '%s://www.youtube.com/get_video_info' % proto,
1598 video_id, note=False,
1599 errnote='unable to download video info webpage',
1600 fatal=False, query=query)
1601 if not video_info_webpage:
1603 get_video_info = compat_parse_qs(video_info_webpage)
1604 add_dash_mpd(get_video_info)
1605 if view_count is None:
1606 view_count = extract_view_count(get_video_info)
1608 video_info = get_video_info
1609 if 'token' in get_video_info:
1610 # Different get_video_info requests may report different results, e.g.
1611 # some may report video unavailability, but some may serve it without
1612 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1613 # the original webpage as well as el=info and el=embedded get_video_info
1614 # requests report video unavailability due to geo restriction while
1615 # el=detailpage succeeds and returns valid data). This is probably
1616 # due to YouTube measures against IP ranges of hosting providers.
1617 # Working around by preferring the first succeeded video_info containing
1618 # the token if no such video_info yet was found.
1619 if 'token' not in video_info:
1620 video_info = get_video_info
1623 def extract_unavailable_message():
1624 return self._html_search_regex(
1625 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1626 video_webpage, 'unavailable message', default=None)
1628 if 'token' not in video_info:
1629 if 'reason' in video_info:
1630 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1631 regions_allowed = self._html_search_meta(
1632 'regionsAllowed', video_webpage, default=None)
1633 countries = regions_allowed.split(',') if regions_allowed else None
1634 self.raise_geo_restricted(
1635 msg=video_info['reason'][0], countries=countries)
1636 reason = video_info['reason'][0]
1637 if 'Invalid parameters' in reason:
1638 unavailable_message = extract_unavailable_message()
1639 if unavailable_message:
1640 reason = unavailable_message
1641 raise ExtractorError(
1642 'YouTube said: %s' % reason,
1643 expected=True, video_id=video_id)
1645 raise ExtractorError(
1646 '"token" parameter not in video info for unknown reason',
1650 if 'title' in video_info:
1651 video_title = video_info['title'][0]
1653 self._downloader.report_warning('Unable to extract video title')
1657 description_original = video_description = get_element_by_id("eow-description", video_webpage)
1658 if video_description:
1661 redir_url = compat_urlparse.urljoin(url, m.group(1))
1662 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1663 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1664 qs = compat_parse_qs(parsed_redir_url.query)
1670 description_original = video_description = re.sub(r'''(?x)
1672 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1673 (?:title|href)="([^"]+)"\s+
1674 (?:[a-zA-Z-]+="[^"]*"\s+)*?
1678 ''', replace_url, video_description)
1679 video_description = clean_html(video_description)
1681 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1683 video_description = unescapeHTML(fd_mobj.group(1))
1685 video_description = ''
1687 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1688 if not self._downloader.params.get('noplaylist'):
1691 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1692 for feed in multifeed_metadata_list.split(','):
1693 # Unquote should take place before split on comma (,) since textual
1694 # fields may contain comma as well (see
1695 # https://github.com/rg3/youtube-dl/issues/8536)
1696 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1698 '_type': 'url_transparent',
1699 'ie_key': 'Youtube',
1701 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1702 {'force_singlefeed': True}),
1703 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1705 feed_ids.append(feed_data['id'][0])
1707 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1708 % (', '.join(feed_ids), video_id))
1709 return self.playlist_result(entries, video_id, video_title, video_description)
1710 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1712 if view_count is None:
1713 view_count = extract_view_count(video_info)
1715 # Check for "rental" videos
1716 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1717 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1719 def _extract_filesize(media_url):
1720 return int_or_none(self._search_regex(
1721 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1723 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1724 self.report_rtmp_download()
1726 'format_id': '_rtmp',
1728 'url': video_info['conn'][0],
1729 'player_url': player_url,
1731 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1732 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1733 if 'rtmpe%3Dyes' in encoded_url_map:
1734 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1736 fmt_list = video_info.get('fmt_list', [''])[0]
1738 for fmt in fmt_list.split(','):
1739 spec = fmt.split('/')
1741 width_height = spec[1].split('x')
1742 if len(width_height) == 2:
1743 formats_spec[spec[0]] = {
1744 'resolution': spec[1],
1745 'width': int_or_none(width_height[0]),
1746 'height': int_or_none(width_height[1]),
1748 q = qualities(['small', 'medium', 'hd720'])
1750 for url_data_str in encoded_url_map.split(','):
1751 url_data = compat_parse_qs(url_data_str)
1752 if 'itag' not in url_data or 'url' not in url_data:
1754 format_id = url_data['itag'][0]
1755 url = url_data['url'][0]
1757 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1758 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1759 jsplayer_url_json = self._search_regex(
1761 embed_webpage if age_gate else video_webpage,
1762 'JS player URL (1)', default=None)
1763 if not jsplayer_url_json and not age_gate:
1764 # We need the embed website after all
1765 if embed_webpage is None:
1766 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1767 embed_webpage = self._download_webpage(
1768 embed_url, video_id, 'Downloading embed webpage')
1769 jsplayer_url_json = self._search_regex(
1770 ASSETS_RE, embed_webpage, 'JS player URL')
1772 player_url = json.loads(jsplayer_url_json)
1773 if player_url is None:
1774 player_url_json = self._search_regex(
1775 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1776 video_webpage, 'age gate player URL')
1777 player_url = json.loads(player_url_json)
1779 if 'sig' in url_data:
1780 url += '&signature=' + url_data['sig'][0]
1781 elif 's' in url_data:
1782 encrypted_sig = url_data['s'][0]
1784 if self._downloader.params.get('verbose'):
1785 if player_url is None:
1786 player_version = 'unknown'
1787 player_desc = 'unknown'
1789 if player_url.endswith('swf'):
1790 player_version = self._search_regex(
1791 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1792 'flash player', fatal=False)
1793 player_desc = 'flash player %s' % player_version
1795 player_version = self._search_regex(
1796 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1797 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1799 'html5 player', fatal=False)
1800 player_desc = 'html5 player %s' % player_version
1802 parts_sizes = self._signature_cache_id(encrypted_sig)
1803 self.to_screen('{%s} signature length %s, %s' %
1804 (format_id, parts_sizes, player_desc))
1806 signature = self._decrypt_signature(
1807 encrypted_sig, video_id, player_url, age_gate)
1808 url += '&signature=' + signature
1809 if 'ratebypass' not in url:
1810 url += '&ratebypass=yes'
1813 'format_id': format_id,
1815 'player_url': player_url,
1817 if format_id in self._formats:
1818 dct.update(self._formats[format_id])
1819 if format_id in formats_spec:
1820 dct.update(formats_spec[format_id])
1822 # Some itags are not included in DASH manifest thus corresponding formats will
1823 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1824 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1825 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1826 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1828 filesize = int_or_none(url_data.get(
1829 'clen', [None])[0]) or _extract_filesize(url)
1831 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1834 'filesize': filesize,
1835 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1838 'fps': int_or_none(url_data.get('fps', [None])[0]),
1839 'format_note': quality,
1840 'quality': q(quality),
1842 for key, value in more_fields.items():
1845 type_ = url_data.get('type', [None])[0]
1847 type_split = type_.split(';')
1848 kind_ext = type_split[0].split('/')
1849 if len(kind_ext) == 2:
1851 dct['ext'] = mimetype2ext(type_split[0])
1852 if kind in ('audio', 'video'):
1854 for mobj in re.finditer(
1855 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1856 if mobj.group('key') == 'codecs':
1857 codecs = mobj.group('val')
1860 dct.update(parse_codecs(codecs))
1861 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1862 dct['downloader_options'] = {
1863 # Youtube throttles chunks >~10M
1864 'http_chunk_size': 10485760,
1867 elif video_info.get('hlsvp'):
1868 manifest_url = video_info['hlsvp'][0]
1870 m3u8_formats = self._extract_m3u8_formats(
1871 manifest_url, video_id, 'mp4', fatal=False)
1872 for a_format in m3u8_formats:
1873 itag = self._search_regex(
1874 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1876 a_format['format_id'] = itag
1877 if itag in self._formats:
1878 dct = self._formats[itag].copy()
1879 dct.update(a_format)
1881 a_format['player_url'] = player_url
1882 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1883 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1884 formats.append(a_format)
1886 error_message = clean_html(video_info.get('reason', [None])[0])
1887 if not error_message:
1888 error_message = extract_unavailable_message()
1890 raise ExtractorError(error_message, expected=True)
1891 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1894 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1896 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1898 self._downloader.report_warning('unable to extract uploader name')
1901 video_uploader_id = None
1902 video_uploader_url = None
1904 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1906 if mobj is not None:
1907 video_uploader_id = mobj.group('uploader_id')
1908 video_uploader_url = mobj.group('uploader_url')
1910 self._downloader.report_warning('unable to extract uploader nickname')
1912 channel_id = self._html_search_meta(
1913 'channelId', video_webpage, 'channel id')
1914 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1917 # We try first to get a high quality image:
1918 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1919 video_webpage, re.DOTALL)
1920 if m_thumb is not None:
1921 video_thumbnail = m_thumb.group(1)
1922 elif 'thumbnail_url' not in video_info:
1923 self._downloader.report_warning('unable to extract video thumbnail')
1924 video_thumbnail = None
1925 else: # don't panic if we can't find it
1926 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1929 upload_date = self._html_search_meta(
1930 'datePublished', video_webpage, 'upload date', default=None)
1932 upload_date = self._search_regex(
1933 [r'(?s)id="eow-date.*?>(.*?)</span>',
1934 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1935 video_webpage, 'upload date', default=None)
1936 upload_date = unified_strdate(upload_date)
1938 video_license = self._html_search_regex(
1939 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1940 video_webpage, 'license', default=None)
1942 m_music = re.search(
1944 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1952 \bhref=["\']/red[^>]*>| # drop possible
1953 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1960 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1961 video_creator = clean_html(m_music.group('creator'))
1963 video_alt_title = video_creator = None
1965 def extract_meta(field):
1966 return self._html_search_regex(
1967 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1968 video_webpage, field, default=None)
1970 track = extract_meta('Song')
1971 artist = extract_meta('Artist')
1973 m_episode = re.search(
1974 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1977 series = m_episode.group('series')
1978 season_number = int(m_episode.group('season'))
1979 episode_number = int(m_episode.group('episode'))
1981 series = season_number = episode_number = None
1983 m_cat_container = self._search_regex(
1984 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1985 video_webpage, 'categories', default=None)
1987 category = self._html_search_regex(
1988 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1990 video_categories = None if category is None else [category]
1992 video_categories = None
1995 unescapeHTML(m.group('content'))
1996 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1998 def _extract_count(count_name):
1999 return str_to_int(self._search_regex(
2000 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2001 % re.escape(count_name),
2002 video_webpage, count_name, default=None))
2004 like_count = _extract_count('like')
2005 dislike_count = _extract_count('dislike')
2008 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2009 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2011 video_duration = try_get(
2012 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2013 if not video_duration:
2014 video_duration = parse_duration(self._html_search_meta(
2015 'duration', video_webpage, 'video duration'))
2018 video_annotations = None
2019 if self._downloader.params.get('writeannotations', False):
2020 video_annotations = self._extract_annotations(video_id)
2022 chapters = self._extract_chapters(description_original, video_duration)
2024 # Look for the DASH manifest
2025 if self._downloader.params.get('youtube_include_dash_manifest', True):
2026 dash_mpd_fatal = True
2027 for mpd_url in dash_mpds:
2030 def decrypt_sig(mobj):
2032 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2033 return '/signature/%s' % dec_s
2035 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2037 for df in self._extract_mpd_formats(
2038 mpd_url, video_id, fatal=dash_mpd_fatal,
2039 formats_dict=self._formats):
2040 if not df.get('filesize'):
2041 df['filesize'] = _extract_filesize(df['url'])
2042 # Do not overwrite DASH format found in some previous DASH manifest
2043 if df['format_id'] not in dash_formats:
2044 dash_formats[df['format_id']] = df
2045 # Additional DASH manifests may end up in HTTP Error 403 therefore
2046 # allow them to fail without bug report message if we already have
2047 # some DASH manifest succeeded. This is temporary workaround to reduce
2048 # burst of bug reports until we figure out the reason and whether it
2049 # can be fixed at all.
2050 dash_mpd_fatal = False
2051 except (ExtractorError, KeyError) as e:
2052 self.report_warning(
2053 'Skipping DASH manifest: %r' % e, video_id)
2055 # Remove the formats we found through non-DASH, they
2056 # contain less info and it can be wrong, because we use
2057 # fixed values (for example the resolution). See
2058 # https://github.com/rg3/youtube-dl/issues/5774 for an
2060 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2061 formats.extend(dash_formats.values())
2063 # Check for malformed aspect ratio
2064 stretched_m = re.search(
2065 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2068 w = float(stretched_m.group('w'))
2069 h = float(stretched_m.group('h'))
2070 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2071 # We will only process correct ratios.
2075 if f.get('vcodec') != 'none':
2076 f['stretched_ratio'] = ratio
2078 self._sort_formats(formats)
2080 self.mark_watched(video_id, video_info)
2084 'uploader': video_uploader,
2085 'uploader_id': video_uploader_id,
2086 'uploader_url': video_uploader_url,
2087 'channel_id': channel_id,
2088 'channel_url': channel_url,
2089 'upload_date': upload_date,
2090 'license': video_license,
2091 'creator': video_creator or artist,
2092 'title': video_title,
2093 'alt_title': video_alt_title or track,
2094 'thumbnail': video_thumbnail,
2095 'description': video_description,
2096 'categories': video_categories,
2098 'subtitles': video_subtitles,
2099 'automatic_captions': automatic_captions,
2100 'duration': video_duration,
2101 'age_limit': 18 if age_gate else 0,
2102 'annotations': video_annotations,
2103 'chapters': chapters,
2104 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2105 'view_count': view_count,
2106 'like_count': like_count,
2107 'dislike_count': dislike_count,
2108 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2111 'start_time': start_time,
2112 'end_time': end_time,
2114 'season_number': season_number,
2115 'episode_number': episode_number,
2121 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2122 IE_DESC = 'YouTube.com playlists'
2123 _VALID_URL = r"""(?x)(?:
2129 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2130 \? (?:.*?[&;])*? (?:p|a|list)=
2133 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2136 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2137 # Top tracks, they can also include dots
2143 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2144 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2145 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2146 IE_NAME = 'youtube:playlist'
2148 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2150 'title': 'ytdl test PL',
2151 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2153 'playlist_count': 3,
2155 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2157 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2158 'title': 'YDL_Empty_List',
2160 'playlist_count': 0,
2161 'skip': 'This playlist is private',
2163 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2164 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2166 'title': '29C3: Not my department',
2167 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2169 'playlist_count': 95,
2171 'note': 'issue #673',
2172 'url': 'PLBB231211A4F62143',
2174 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2175 'id': 'PLBB231211A4F62143',
2177 'playlist_mincount': 26,
2179 'note': 'Large playlist',
2180 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2182 'title': 'Uploads from Cauchemar',
2183 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2185 'playlist_mincount': 799,
2187 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2189 'title': 'YDL_safe_search',
2190 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2192 'playlist_count': 2,
2193 'skip': 'This playlist is private',
2196 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2197 'playlist_count': 4,
2200 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2203 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2204 'playlist_mincount': 485,
2206 'title': '2017 華語最新單曲 (2/24更新)',
2207 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2210 'note': 'Embedded SWF player',
2211 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2212 'playlist_count': 4,
2215 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2218 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2219 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2221 'title': 'Uploads from Interstellar Movie',
2222 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2224 'playlist_mincount': 21,
2226 # Playlist URL that does not actually serve a playlist
2227 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2229 'id': 'FqZTN594JQw',
2231 'title': "Smiley's People 01 detective, Adventure Series, Action",
2232 'uploader': 'STREEM',
2233 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2235 'upload_date': '20150526',
2236 'license': 'Standard YouTube License',
2237 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2238 'categories': ['People & Blogs'],
2241 'dislike_count': int,
2244 'skip_download': True,
2246 'add_ie': [YoutubeIE.ie_key()],
2248 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2250 'id': 'yeWKywCrFtk',
2252 'title': 'Small Scale Baler and Braiding Rugs',
2253 'uploader': 'Backus-Page House Museum',
2254 'uploader_id': 'backuspagemuseum',
2255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2256 'upload_date': '20161008',
2257 'license': 'Standard YouTube License',
2258 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2259 'categories': ['Nonprofits & Activism'],
2262 'dislike_count': int,
2266 'skip_download': True,
2269 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2270 'only_matching': True,
2272 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2273 'only_matching': True,
2275 # music album playlist
2276 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2277 'only_matching': True,
2280 def _real_initialize(self):
2283 def _extract_mix(self, playlist_id):
2284 # The mixes are generated from a single video
2285 # the id of the playlist is just 'RD' + video_id
2287 last_id = playlist_id[-11:]
2288 for n in itertools.count(1):
2289 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2290 webpage = self._download_webpage(
2291 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2292 new_ids = orderedSet(re.findall(
2293 r'''(?xs)data-video-username=".*?".*?
2294 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
2296 # Fetch new pages until all the videos are repeated, it seems that
2297 # there are always 51 unique videos.
2298 new_ids = [_id for _id in new_ids if _id not in ids]
2304 url_results = self._ids_to_results(ids)
2306 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2308 search_title('playlist-title') or
2309 search_title('title long-title') or
2310 search_title('title'))
2311 title = clean_html(title_span)
2313 return self.playlist_result(url_results, playlist_id, title)
2315 def _extract_playlist(self, playlist_id):
2316 url = self._TEMPLATE_URL % playlist_id
2317 page = self._download_webpage(url, playlist_id)
2319 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2320 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2321 match = match.strip()
2322 # Check if the playlist exists or is private
2323 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2325 reason = mobj.group('reason')
2326 message = 'This playlist %s' % reason
2327 if 'private' in reason:
2328 message += ', use --username or --netrc to access it'
2330 raise ExtractorError(message, expected=True)
2331 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2332 raise ExtractorError(
2333 'Invalid parameters. Maybe URL is incorrect.',
2335 elif re.match(r'[^<]*Choose your language[^<]*', match):
2338 self.report_warning('Youtube gives an alert message: ' + match)
2340 playlist_title = self._html_search_regex(
2341 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2342 page, 'title', default=None)
2344 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2345 uploader = self._search_regex(
2346 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2347 page, 'uploader', default=None)
2349 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2352 uploader_id = mobj.group('uploader_id')
2353 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2355 uploader_id = uploader_url = None
2359 if not playlist_title:
2361 # Some playlist URLs don't actually serve a playlist (e.g.
2362 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2363 next(self._entries(page, playlist_id))
2364 except StopIteration:
2367 playlist = self.playlist_result(
2368 self._entries(page, playlist_id), playlist_id, playlist_title)
2370 'uploader': uploader,
2371 'uploader_id': uploader_id,
2372 'uploader_url': uploader_url,
2375 return has_videos, playlist
2377 def _check_download_just_video(self, url, playlist_id):
2378 # Check if it's a video-specific URL
2379 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2380 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2381 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2382 'video id', default=None)
2384 if self._downloader.params.get('noplaylist'):
2385 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2386 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2388 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2389 return video_id, None
2392 def _real_extract(self, url):
2393 # Extract playlist id
2394 mobj = re.match(self._VALID_URL, url)
2396 raise ExtractorError('Invalid URL: %s' % url)
2397 playlist_id = mobj.group(1) or mobj.group(2)
2399 video_id, video = self._check_download_just_video(url, playlist_id)
2403 if playlist_id.startswith(('RD', 'UL', 'PU')):
2404 # Mixes require a custom extraction process
2405 return self._extract_mix(playlist_id)
2407 has_videos, playlist = self._extract_playlist(playlist_id)
2408 if has_videos or not video_id:
2411 # Some playlist URLs don't actually serve a playlist (see
2412 # https://github.com/rg3/youtube-dl/issues/10537).
2413 # Fallback to plain video extraction if there is a video id
2414 # along with playlist id.
2415 return self.url_result(video_id, 'Youtube', video_id=video_id)
2418 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2419 IE_DESC = 'YouTube.com channels'
2420 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2421 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2422 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2423 IE_NAME = 'youtube:channel'
2425 'note': 'paginated channel',
2426 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2427 'playlist_mincount': 91,
2429 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2430 'title': 'Uploads from lex will',
2433 'note': 'Age restricted channel',
2434 # from https://www.youtube.com/user/DeusExOfficial
2435 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2436 'playlist_mincount': 64,
2438 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2439 'title': 'Uploads from Deus Ex',
2444 def suitable(cls, url):
2445 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2446 else super(YoutubeChannelIE, cls).suitable(url))
2448 def _build_template_url(self, url, channel_id):
2449 return self._TEMPLATE_URL % channel_id
2451 def _real_extract(self, url):
2452 channel_id = self._match_id(url)
2454 url = self._build_template_url(url, channel_id)
2456 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2457 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2458 # otherwise fallback on channel by page extraction
2459 channel_page = self._download_webpage(
2460 url + '?view=57', channel_id,
2461 'Downloading channel page', fatal=False)
2462 if channel_page is False:
2463 channel_playlist_id = False
2465 channel_playlist_id = self._html_search_meta(
2466 'channelId', channel_page, 'channel id', default=None)
2467 if not channel_playlist_id:
2468 channel_url = self._html_search_meta(
2469 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2470 channel_page, 'channel url', default=None)
2472 channel_playlist_id = self._search_regex(
2473 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2474 channel_url, 'channel id', default=None)
2475 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2476 playlist_id = 'UU' + channel_playlist_id[2:]
2477 return self.url_result(
2478 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2480 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2481 autogenerated = re.search(r'''(?x)
2483 channel-header-autogenerated-label|
2484 yt-channel-title-autogenerated
2485 )[^"]*"''', channel_page) is not None
2488 # The videos are contained in a single page
2489 # the ajax pages can't be used, they are empty
2492 video_id, 'Youtube', video_id=video_id,
2493 video_title=video_title)
2494 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2495 return self.playlist_result(entries, channel_id)
2498 next(self._entries(channel_page, channel_id))
2499 except StopIteration:
2500 alert_message = self._html_search_regex(
2501 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2502 channel_page, 'alert', default=None, group='alert')
2504 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2506 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2509 class YoutubeUserIE(YoutubeChannelIE):
2510 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2511 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2512 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2513 IE_NAME = 'youtube:user'
2516 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2517 'playlist_mincount': 320,
2519 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2520 'title': 'Uploads from The Linux Foundation',
2523 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2524 # but not https://www.youtube.com/user/12minuteathlete/videos
2525 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2526 'playlist_mincount': 249,
2528 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2529 'title': 'Uploads from 12 Minute Athlete',
2532 'url': 'ytuser:phihag',
2533 'only_matching': True,
2535 'url': 'https://www.youtube.com/c/gametrailers',
2536 'only_matching': True,
2538 'url': 'https://www.youtube.com/gametrailers',
2539 'only_matching': True,
2541 # This channel is not available, geo restricted to JP
2542 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2543 'only_matching': True,
2547 def suitable(cls, url):
2548 # Don't return True if the url can be extracted with other youtube
2549 # extractor, the regex would is too permissive and it would match.
2550 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2551 if any(ie.suitable(url) for ie in other_yt_ies):
2554 return super(YoutubeUserIE, cls).suitable(url)
2556 def _build_template_url(self, url, channel_id):
2557 mobj = re.match(self._VALID_URL, url)
2558 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2561 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2562 IE_DESC = 'YouTube.com live streams'
2563 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2564 IE_NAME = 'youtube:live'
2567 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2569 'id': 'a48o2S1cPoo',
2571 'title': 'The Young Turks - Live Main Show',
2572 'uploader': 'The Young Turks',
2573 'uploader_id': 'TheYoungTurks',
2574 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2575 'upload_date': '20150715',
2576 'license': 'Standard YouTube License',
2577 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2578 'categories': ['News & Politics'],
2579 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2581 'dislike_count': int,
2584 'skip_download': True,
2587 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2588 'only_matching': True,
2590 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2591 'only_matching': True,
2593 'url': 'https://www.youtube.com/TheYoungTurks/live',
2594 'only_matching': True,
2597 def _real_extract(self, url):
2598 mobj = re.match(self._VALID_URL, url)
2599 channel_id = mobj.group('id')
2600 base_url = mobj.group('base_url')
2601 webpage = self._download_webpage(url, channel_id, fatal=False)
2603 page_type = self._og_search_property(
2604 'type', webpage, 'page type', default='')
2605 video_id = self._html_search_meta(
2606 'videoId', webpage, 'video id', default=None)
2607 if page_type.startswith('video') and video_id and re.match(
2608 r'^[0-9A-Za-z_-]{11}$', video_id):
2609 return self.url_result(video_id, YoutubeIE.ie_key())
2610 return self.url_result(base_url)
2613 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2614 IE_DESC = 'YouTube.com user/channel playlists'
2615 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2616 IE_NAME = 'youtube:playlists'
2619 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2620 'playlist_mincount': 4,
2622 'id': 'ThirstForScience',
2623 'title': 'Thirst for Science',
2626 # with "Load more" button
2627 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2628 'playlist_mincount': 70,
2631 'title': 'Игорь Клейнер',
2634 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2635 'playlist_mincount': 17,
2637 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2638 'title': 'Chem Player',
2643 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2644 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2647 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2648 IE_DESC = 'YouTube.com searches'
2649 # there doesn't appear to be a real limit, for example if you search for
2650 # 'python' you get more than 8.000.000 results
2651 _MAX_RESULTS = float('inf')
2652 IE_NAME = 'youtube:search'
2653 _SEARCH_KEY = 'ytsearch'
2654 _EXTRA_QUERY_ARGS = {}
2657 def _get_n_results(self, query, n):
2658 """Get a specified number of results for a query"""
2664 'search_query': query.encode('utf-8'),
2666 url_query.update(self._EXTRA_QUERY_ARGS)
2667 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2669 for pagenum in itertools.count(1):
2670 data = self._download_json(
2671 result_url, video_id='query "%s"' % query,
2672 note='Downloading page %s' % pagenum,
2673 errnote='Unable to download API page',
2674 query={'spf': 'navigate'})
2675 html_content = data[1]['body']['content']
2677 if 'class="search-message' in html_content:
2678 raise ExtractorError(
2679 '[youtube] No video results', expected=True)
2681 new_videos = list(self._process_page(html_content))
2682 videos += new_videos
2683 if not new_videos or len(videos) > limit:
2685 next_link = self._html_search_regex(
2686 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2687 html_content, 'next link', default=None)
2688 if next_link is None:
2690 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2694 return self.playlist_result(videos, query)
2697 class YoutubeSearchDateIE(YoutubeSearchIE):
2698 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2699 _SEARCH_KEY = 'ytsearchdate'
2700 IE_DESC = 'YouTube.com searches, newest videos first'
2701 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2704 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2705 IE_DESC = 'YouTube.com search URLs'
2706 IE_NAME = 'youtube:search_url'
2707 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2709 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2710 'playlist_mincount': 5,
2712 'title': 'youtube-dl test video',
2715 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2716 'only_matching': True,
2719 def _real_extract(self, url):
2720 mobj = re.match(self._VALID_URL, url)
2721 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2722 webpage = self._download_webpage(url, query)
2723 return self.playlist_result(self._process_page(webpage), playlist_title=query)
2726 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2727 IE_DESC = 'YouTube.com (multi-season) shows'
2728 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2729 IE_NAME = 'youtube:show'
2731 'url': 'https://www.youtube.com/show/airdisasters',
2732 'playlist_mincount': 5,
2734 'id': 'airdisasters',
2735 'title': 'Air Disasters',
2739 def _real_extract(self, url):
2740 playlist_id = self._match_id(url)
2741 return super(YoutubeShowIE, self)._real_extract(
2742 'https://www.youtube.com/show/%s/playlists' % playlist_id)
2745 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2747 Base class for feed extractors
2748 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2750 _LOGIN_REQUIRED = True
2754 return 'youtube:%s' % self._FEED_NAME
2756 def _real_initialize(self):
2759 def _entries(self, page):
2760 # The extraction process is the same as for playlists, but the regex
2761 # for the video ids doesn't contain an index
2763 more_widget_html = content_html = page
2764 for page_num in itertools.count(1):
2765 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2767 # 'recommended' feed has infinite 'load more' and each new portion spins
2768 # the same videos in (sometimes) slightly different order, so we'll check
2769 # for unicity and break when portion has no new videos
2770 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2776 for entry in self._ids_to_results(new_ids):
2779 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2783 more = self._download_json(
2784 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2785 'Downloading page #%s' % page_num,
2786 transform_source=uppercase_escape)
2787 content_html = more['content_html']
2788 more_widget_html = more['load_more_widget_html']
2790 def _real_extract(self, url):
2791 page = self._download_webpage(
2792 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2793 self._PLAYLIST_TITLE)
2794 return self.playlist_result(
2795 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2798 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2799 IE_NAME = 'youtube:watchlater'
2800 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2801 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2804 'url': 'https://www.youtube.com/playlist?list=WL',
2805 'only_matching': True,
2807 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2808 'only_matching': True,
2811 def _real_extract(self, url):
2812 _, video = self._check_download_just_video(url, 'WL')
2815 _, playlist = self._extract_playlist('WL')
2819 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2820 IE_NAME = 'youtube:favorites'
2821 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2822 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2823 _LOGIN_REQUIRED = True
2825 def _real_extract(self, url):
2826 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2827 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2828 return self.url_result(playlist_id, 'YoutubePlaylist')
2831 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2832 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2833 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2834 _FEED_NAME = 'recommended'
2835 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2838 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2839 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2840 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2841 _FEED_NAME = 'subscriptions'
2842 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2845 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2846 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2847 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2848 _FEED_NAME = 'history'
2849 _PLAYLIST_TITLE = 'Youtube History'
2852 class YoutubeTruncatedURLIE(InfoExtractor):
2853 IE_NAME = 'youtube:truncated_url'
2854 IE_DESC = False # Do not list
2855 _VALID_URL = r'''(?x)
2857 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2860 annotation_id=annotation_[^&]+|
2866 attribution_link\?a=[^&]+
2872 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2873 'only_matching': True,
2875 'url': 'https://www.youtube.com/watch?',
2876 'only_matching': True,
2878 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2879 'only_matching': True,
2881 'url': 'https://www.youtube.com/watch?feature=foo',
2882 'only_matching': True,
2884 'url': 'https://www.youtube.com/watch?hl=en-GB',
2885 'only_matching': True,
2887 'url': 'https://www.youtube.com/watch?t=2372',
2888 'only_matching': True,
2891 def _real_extract(self, url):
2892 raise ExtractorError(
2893 'Did you forget to quote the URL? Remember that & is a meta '
2894 'character in most shells, so you want to put the URL in quotes, '
2896 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2897 ' or simply youtube-dl BaW_jenozKc .',
2901 class YoutubeTruncatedIDIE(InfoExtractor):
2902 IE_NAME = 'youtube:truncated_id'
2903 IE_DESC = False # Do not list
2904 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2907 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2908 'only_matching': True,
2911 def _real_extract(self, url):
2912 video_id = self._match_id(url)
2913 raise ExtractorError(
2914 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),