[youtube] Improve player id extraction and add tests
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_HTTPError,
20     compat_kwargs,
21     compat_parse_qs,
22     compat_urllib_parse_unquote,
23     compat_urllib_parse_unquote_plus,
24     compat_urllib_parse_urlencode,
25     compat_urllib_parse_urlparse,
26     compat_urlparse,
27     compat_str,
28 )
29 from ..utils import (
30     bool_or_none,
31     clean_html,
32     error_to_compat_str,
33     extract_attributes,
34     ExtractorError,
35     float_or_none,
36     get_element_by_attribute,
37     get_element_by_id,
38     int_or_none,
39     mimetype2ext,
40     orderedSet,
41     parse_codecs,
42     parse_duration,
43     remove_quotes,
44     remove_start,
45     smuggle_url,
46     str_or_none,
47     str_to_int,
48     try_get,
49     unescapeHTML,
50     unified_strdate,
51     unsmuggle_url,
52     uppercase_escape,
53     url_or_none,
54     urlencode_postdata,
55 )
56
57
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59     """Provide base functions for Youtube extractors"""
60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
62
63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
66
67     _NETRC_MACHINE = 'youtube'
68     # If True it will raise an error if no login info is provided
69     _LOGIN_REQUIRED = False
70
71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
72
73     def _set_language(self):
74         self._set_cookie(
75             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76             # YouTube sets the expire time to about two months
77             expire_time=time.time() + 2 * 30 * 24 * 3600)
78
79     def _ids_to_results(self, ids):
80         return [
81             self.url_result(vid_id, 'Youtube', video_id=vid_id)
82             for vid_id in ids]
83
84     def _login(self):
85         """
86         Attempt to log in to YouTube.
87         True is returned if successful or skipped.
88         False is returned if login failed.
89
90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91         """
92         username, password = self._get_login_info()
93         # No authentication to be performed
94         if username is None:
95             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
97             return True
98
99         login_page = self._download_webpage(
100             self._LOGIN_URL, None,
101             note='Downloading login page',
102             errnote='unable to fetch login page', fatal=False)
103         if login_page is False:
104             return
105
106         login_form = self._hidden_inputs(login_page)
107
108         def req(url, f_req, note, errnote):
109             data = login_form.copy()
110             data.update({
111                 'pstMsg': 1,
112                 'checkConnection': 'youtube',
113                 'checkedDomains': 'youtube',
114                 'hl': 'en',
115                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116                 'f.req': json.dumps(f_req),
117                 'flowName': 'GlifWebSignIn',
118                 'flowEntry': 'ServiceLogin',
119                 # TODO: reverse actual botguard identifier generation algo
120                 'bgRequest': '["identifier",""]',
121             })
122             return self._download_json(
123                 url, None, note=note, errnote=errnote,
124                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
125                 fatal=False,
126                 data=urlencode_postdata(data), headers={
127                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128                     'Google-Accounts-XSRF': 1,
129                 })
130
131         def warn(message):
132             self._downloader.report_warning(message)
133
134         lookup_req = [
135             username,
136             None, [], None, 'US', None, None, 2, False, True,
137             [
138                 None, None,
139                 [2, 1, None, 1,
140                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
141                  None, [], 4],
142                 1, [None, None, []], None, None, None, True
143             ],
144             username,
145         ]
146
147         lookup_results = req(
148             self._LOOKUP_URL, lookup_req,
149             'Looking up account info', 'Unable to look up account info')
150
151         if lookup_results is False:
152             return False
153
154         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
155         if not user_hash:
156             warn('Unable to extract user hash')
157             return False
158
159         challenge_req = [
160             user_hash,
161             None, 1, None, [1, None, None, None, [password, None, True]],
162             [
163                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164                 1, [None, None, []], None, None, None, True
165             ]]
166
167         challenge_results = req(
168             self._CHALLENGE_URL, challenge_req,
169             'Logging in', 'Unable to log in')
170
171         if challenge_results is False:
172             return
173
174         login_res = try_get(challenge_results, lambda x: x[0][5], list)
175         if login_res:
176             login_msg = try_get(login_res, lambda x: x[5], compat_str)
177             warn(
178                 'Unable to login: %s' % 'Invalid password'
179                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
180             return False
181
182         res = try_get(challenge_results, lambda x: x[0][-1], list)
183         if not res:
184             warn('Unable to extract result entry')
185             return False
186
187         login_challenge = try_get(res, lambda x: x[0][0], list)
188         if login_challenge:
189             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190             if challenge_str == 'TWO_STEP_VERIFICATION':
191                 # SEND_SUCCESS - TFA code has been successfully sent to phone
192                 # QUOTA_EXCEEDED - reached the limit of TFA codes
193                 status = try_get(login_challenge, lambda x: x[5], compat_str)
194                 if status == 'QUOTA_EXCEEDED':
195                     warn('Exceeded the limit of TFA codes, try later')
196                     return False
197
198                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
199                 if not tl:
200                     warn('Unable to extract TL')
201                     return False
202
203                 tfa_code = self._get_tfa_info('2-step verification code')
204
205                 if not tfa_code:
206                     warn(
207                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
209                     return False
210
211                 tfa_code = remove_start(tfa_code, 'G-')
212
213                 tfa_req = [
214                     user_hash, None, 2, None,
215                     [
216                         9, None, None, None, None, None, None, None,
217                         [None, tfa_code, True, 2]
218                     ]]
219
220                 tfa_results = req(
221                     self._TFA_URL.format(tl), tfa_req,
222                     'Submitting TFA code', 'Unable to submit TFA code')
223
224                 if tfa_results is False:
225                     return False
226
227                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
228                 if tfa_res:
229                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
230                     warn(
231                         'Unable to finish TFA: %s' % 'Invalid TFA code'
232                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
233                     return False
234
235                 check_cookie_url = try_get(
236                     tfa_results, lambda x: x[0][-1][2], compat_str)
237             else:
238                 CHALLENGES = {
239                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
242                 }
243                 challenge = CHALLENGES.get(
244                     challenge_str,
245                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
246                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
247                 return False
248         else:
249             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
250
251         if not check_cookie_url:
252             warn('Unable to extract CheckCookie URL')
253             return False
254
255         check_cookie_results = self._download_webpage(
256             check_cookie_url, None, 'Checking cookie', fatal=False)
257
258         if check_cookie_results is False:
259             return False
260
261         if 'https://myaccount.google.com/' not in check_cookie_results:
262             warn('Unable to log in')
263             return False
264
265         return True
266
267     def _download_webpage_handle(self, *args, **kwargs):
268         query = kwargs.get('query', {}).copy()
269         query['disable_polymer'] = 'true'
270         kwargs['query'] = query
271         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272             *args, **compat_kwargs(kwargs))
273
274     def _real_initialize(self):
275         if self._downloader is None:
276             return
277         self._set_language()
278         if not self._login():
279             return
280
281
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283     # Extract entries from page with "Load more" button
284     def _entries(self, page, playlist_id):
285         more_widget_html = content_html = page
286         for page_num in itertools.count(1):
287             for entry in self._process_page(content_html):
288                 yield entry
289
290             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
291             if not mobj:
292                 break
293
294             count = 0
295             retries = 3
296             while count <= retries:
297                 try:
298                     # Downloading page may result in intermittent 5xx HTTP error
299                     # that is usually worked around with a retry
300                     more = self._download_json(
301                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302                         'Downloading page #%s%s'
303                         % (page_num, ' (retry #%d)' % count if count else ''),
304                         transform_source=uppercase_escape)
305                     break
306                 except ExtractorError as e:
307                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
308                         count += 1
309                         if count <= retries:
310                             continue
311                     raise
312
313             content_html = more['content_html']
314             if not content_html.strip():
315                 # Some webpages show a "Load more" button but they don't
316                 # have more videos
317                 break
318             more_widget_html = more['load_more_widget_html']
319
320
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322     def _process_page(self, content):
323         for video_id, video_title in self.extract_videos_from_page(content):
324             yield self.url_result(video_id, 'Youtube', video_id, video_title)
325
326     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327         for mobj in re.finditer(video_re, page):
328             # The link with index 0 is not the first video of the playlist (not sure if still actual)
329             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
330                 continue
331             video_id = mobj.group('id')
332             video_title = unescapeHTML(
333                 mobj.group('title')) if 'title' in mobj.groupdict() else None
334             if video_title:
335                 video_title = video_title.strip()
336             if video_title == '► Play all':
337                 video_title = None
338             try:
339                 idx = ids_in_page.index(video_id)
340                 if video_title and not titles_in_page[idx]:
341                     titles_in_page[idx] = video_title
342             except ValueError:
343                 ids_in_page.append(video_id)
344                 titles_in_page.append(video_title)
345
346     def extract_videos_from_page(self, page):
347         ids_in_page = []
348         titles_in_page = []
349         self.extract_videos_from_page_impl(
350             self._VIDEO_RE, page, ids_in_page, titles_in_page)
351         return zip(ids_in_page, titles_in_page)
352
353
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355     def _process_page(self, content):
356         for playlist_id in orderedSet(re.findall(
357                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
358                 content)):
359             yield self.url_result(
360                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
361
362     def _real_extract(self, url):
363         playlist_id = self._match_id(url)
364         webpage = self._download_webpage(url, playlist_id)
365         title = self._og_search_title(webpage, fatal=False)
366         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
367
368
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370     IE_DESC = 'YouTube.com'
371     _VALID_URL = r"""(?x)^
372                      (
373                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
374                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375                             (?:www\.)?deturl\.com/www\.youtube\.com/|
376                             (?:www\.)?pwnyoutube\.com/|
377                             (?:www\.)?hooktube\.com/|
378                             (?:www\.)?yourepeat\.com/|
379                             tube\.majestyc\.net/|
380                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381                             (?:(?:www|dev)\.)?invidio\.us/|
382                             (?:(?:www|no)\.)?invidiou\.sh/|
383                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384                             (?:www\.)?invidious\.kabi\.tk/|
385                             (?:www\.)?invidious\.13ad\.de/|
386                             (?:www\.)?invidious\.mastodon\.host/|
387                             (?:www\.)?invidious\.nixnet\.xyz/|
388                             (?:www\.)?invidious\.drycat\.fr/|
389                             (?:www\.)?tube\.poal\.co/|
390                             (?:www\.)?vid\.wxzm\.sx/|
391                             (?:www\.)?yt\.elukerio\.org/|
392                             (?:www\.)?yt\.lelux\.fi/|
393                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
394                             (?:www\.)?qklhadlycap4cnod\.onion/|
395                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
396                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
397                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
398                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
399                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
400                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
401                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
402                          (?:                                                  # the various things that can precede the ID:
403                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
404                              |(?:                                             # or the v= param in all its forms
405                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
406                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
407                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
408                                  v=
409                              )
410                          ))
411                          |(?:
412                             youtu\.be|                                        # just youtu.be/xxxx
413                             vid\.plus|                                        # or vid.plus/xxxx
414                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
415                          )/
416                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
417                          )
418                      )?                                                       # all until now is optional -> you can pass the naked ID
419                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
420                      (?!.*?\blist=
421                         (?:
422                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
423                             WL                                                # WL are handled by the watch later IE
424                         )
425                      )
426                      (?(1).+)?                                                # if we found the ID, everything can follow
427                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
428     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
429     _PLAYER_INFO_RE = (
430         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
431         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
432     )
433     _formats = {
434         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
435         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
436         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
437         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
438         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
439         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
440         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
441         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
442         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
443         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
444         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
445         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
446         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
447         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
448         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
449         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
450         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
451         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
452
453
454         # 3D videos
455         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
456         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
457         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
458         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
459         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
460         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
461         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
462
463         # Apple HTTP Live Streaming
464         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
465         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
466         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
467         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
468         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
469         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
470         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
471         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
472
473         # DASH mp4 video
474         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
475         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
476         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
477         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
478         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
479         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
480         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
481         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
482         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
483         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
484         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
485         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
486
487         # Dash mp4 audio
488         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
489         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
490         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
491         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
492         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
493         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
494         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
495
496         # Dash webm
497         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
504         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
513         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
515         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
518         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
519
520         # Dash webm audio
521         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
522         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
523
524         # Dash webm audio with opus inside
525         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
526         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
527         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
528
529         # RTMP (unnamed)
530         '_rtmp': {'protocol': 'rtmp'},
531
532         # av01 video only formats sometimes served with "unknown" codecs
533         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
537     }
538     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
539
540     _GEO_BYPASS = False
541
542     IE_NAME = 'youtube'
543     _TESTS = [
544         {
545             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
546             'info_dict': {
547                 'id': 'BaW_jenozKc',
548                 'ext': 'mp4',
549                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
550                 'uploader': 'Philipp Hagemeister',
551                 'uploader_id': 'phihag',
552                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
553                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
554                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
555                 'upload_date': '20121002',
556                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
557                 'categories': ['Science & Technology'],
558                 'tags': ['youtube-dl'],
559                 'duration': 10,
560                 'view_count': int,
561                 'like_count': int,
562                 'dislike_count': int,
563                 'start_time': 1,
564                 'end_time': 9,
565             }
566         },
567         {
568             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
569             'note': 'Test generic use_cipher_signature video (#897)',
570             'info_dict': {
571                 'id': 'UxxajLWwzqY',
572                 'ext': 'mp4',
573                 'upload_date': '20120506',
574                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
575                 'alt_title': 'I Love It (feat. Charli XCX)',
576                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
577                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
578                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
579                          'iconic ep', 'iconic', 'love', 'it'],
580                 'duration': 180,
581                 'uploader': 'Icona Pop',
582                 'uploader_id': 'IconaPop',
583                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
584                 'creator': 'Icona Pop',
585                 'track': 'I Love It (feat. Charli XCX)',
586                 'artist': 'Icona Pop',
587             }
588         },
589         {
590             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
591             'note': 'Test VEVO video with age protection (#956)',
592             'info_dict': {
593                 'id': '07FYdnEawAQ',
594                 'ext': 'mp4',
595                 'upload_date': '20130703',
596                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
597                 'alt_title': 'Tunnel Vision',
598                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
599                 'duration': 419,
600                 'uploader': 'justintimberlakeVEVO',
601                 'uploader_id': 'justintimberlakeVEVO',
602                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
603                 'creator': 'Justin Timberlake',
604                 'track': 'Tunnel Vision',
605                 'artist': 'Justin Timberlake',
606                 'age_limit': 18,
607             }
608         },
609         {
610             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
611             'note': 'Embed-only video (#1746)',
612             'info_dict': {
613                 'id': 'yZIXLfi8CZQ',
614                 'ext': 'mp4',
615                 'upload_date': '20120608',
616                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
617                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
618                 'uploader': 'SET India',
619                 'uploader_id': 'setindia',
620                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
621                 'age_limit': 18,
622             }
623         },
624         {
625             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
626             'note': 'Use the first video ID in the URL',
627             'info_dict': {
628                 'id': 'BaW_jenozKc',
629                 'ext': 'mp4',
630                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
631                 'uploader': 'Philipp Hagemeister',
632                 'uploader_id': 'phihag',
633                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
634                 'upload_date': '20121002',
635                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
636                 'categories': ['Science & Technology'],
637                 'tags': ['youtube-dl'],
638                 'duration': 10,
639                 'view_count': int,
640                 'like_count': int,
641                 'dislike_count': int,
642             },
643             'params': {
644                 'skip_download': True,
645             },
646         },
647         {
648             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
649             'note': '256k DASH audio (format 141) via DASH manifest',
650             'info_dict': {
651                 'id': 'a9LDPn-MO4I',
652                 'ext': 'm4a',
653                 'upload_date': '20121002',
654                 'uploader_id': '8KVIDEO',
655                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
656                 'description': '',
657                 'uploader': '8KVIDEO',
658                 'title': 'UHDTV TEST 8K VIDEO.mp4'
659             },
660             'params': {
661                 'youtube_include_dash_manifest': True,
662                 'format': '141',
663             },
664             'skip': 'format 141 not served anymore',
665         },
666         # DASH manifest with encrypted signature
667         {
668             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
669             'info_dict': {
670                 'id': 'IB3lcPjvWLA',
671                 'ext': 'm4a',
672                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
673                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
674                 'duration': 244,
675                 'uploader': 'AfrojackVEVO',
676                 'uploader_id': 'AfrojackVEVO',
677                 'upload_date': '20131011',
678             },
679             'params': {
680                 'youtube_include_dash_manifest': True,
681                 'format': '141/bestaudio[ext=m4a]',
682             },
683         },
684         # JS player signature function name containing $
685         {
686             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
687             'info_dict': {
688                 'id': 'nfWlot6h_JM',
689                 'ext': 'm4a',
690                 'title': 'Taylor Swift - Shake It Off',
691                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
692                 'duration': 242,
693                 'uploader': 'TaylorSwiftVEVO',
694                 'uploader_id': 'TaylorSwiftVEVO',
695                 'upload_date': '20140818',
696             },
697             'params': {
698                 'youtube_include_dash_manifest': True,
699                 'format': '141/bestaudio[ext=m4a]',
700             },
701         },
702         # Controversy video
703         {
704             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
705             'info_dict': {
706                 'id': 'T4XJQO3qol8',
707                 'ext': 'mp4',
708                 'duration': 219,
709                 'upload_date': '20100909',
710                 'uploader': 'Amazing Atheist',
711                 'uploader_id': 'TheAmazingAtheist',
712                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
713                 'title': 'Burning Everyone\'s Koran',
714                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
715             }
716         },
717         # Normal age-gate video (No vevo, embed allowed)
718         {
719             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
720             'info_dict': {
721                 'id': 'HtVdAasjOgU',
722                 'ext': 'mp4',
723                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
724                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
725                 'duration': 142,
726                 'uploader': 'The Witcher',
727                 'uploader_id': 'WitcherGame',
728                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
729                 'upload_date': '20140605',
730                 'age_limit': 18,
731             },
732         },
733         # Age-gate video with encrypted signature
734         {
735             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
736             'info_dict': {
737                 'id': '6kLq3WMV1nU',
738                 'ext': 'mp4',
739                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
740                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
741                 'duration': 246,
742                 'uploader': 'LloydVEVO',
743                 'uploader_id': 'LloydVEVO',
744                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
745                 'upload_date': '20110629',
746                 'age_limit': 18,
747             },
748         },
749         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
750         # YouTube Red ad is not captured for creator
751         {
752             'url': '__2ABJjxzNo',
753             'info_dict': {
754                 'id': '__2ABJjxzNo',
755                 'ext': 'mp4',
756                 'duration': 266,
757                 'upload_date': '20100430',
758                 'uploader_id': 'deadmau5',
759                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
760                 'creator': 'Dada Life, deadmau5',
761                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
762                 'uploader': 'deadmau5',
763                 'title': 'Deadmau5 - Some Chords (HD)',
764                 'alt_title': 'This Machine Kills Some Chords',
765             },
766             'expected_warnings': [
767                 'DASH manifest missing',
768             ]
769         },
770         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
771         {
772             'url': 'lqQg6PlCWgI',
773             'info_dict': {
774                 'id': 'lqQg6PlCWgI',
775                 'ext': 'mp4',
776                 'duration': 6085,
777                 'upload_date': '20150827',
778                 'uploader_id': 'olympic',
779                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
780                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
781                 'uploader': 'Olympic',
782                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
783             },
784             'params': {
785                 'skip_download': 'requires avconv',
786             }
787         },
788         # Non-square pixels
789         {
790             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
791             'info_dict': {
792                 'id': '_b-2C3KPAM0',
793                 'ext': 'mp4',
794                 'stretched_ratio': 16 / 9.,
795                 'duration': 85,
796                 'upload_date': '20110310',
797                 'uploader_id': 'AllenMeow',
798                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
799                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
800                 'uploader': '孫ᄋᄅ',
801                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
802             },
803         },
804         # url_encoded_fmt_stream_map is empty string
805         {
806             'url': 'qEJwOuvDf7I',
807             'info_dict': {
808                 'id': 'qEJwOuvDf7I',
809                 'ext': 'webm',
810                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
811                 'description': '',
812                 'upload_date': '20150404',
813                 'uploader_id': 'spbelect',
814                 'uploader': 'Наблюдатели Петербурга',
815             },
816             'params': {
817                 'skip_download': 'requires avconv',
818             },
819             'skip': 'This live event has ended.',
820         },
821         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
822         {
823             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
824             'info_dict': {
825                 'id': 'FIl7x6_3R5Y',
826                 'ext': 'webm',
827                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
828                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
829                 'duration': 220,
830                 'upload_date': '20150625',
831                 'uploader_id': 'dorappi2000',
832                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
833                 'uploader': 'dorappi2000',
834                 'formats': 'mincount:31',
835             },
836             'skip': 'not actual anymore',
837         },
838         # DASH manifest with segment_list
839         {
840             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
841             'md5': '8ce563a1d667b599d21064e982ab9e31',
842             'info_dict': {
843                 'id': 'CsmdDsKjzN8',
844                 'ext': 'mp4',
845                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
846                 'uploader': 'Airtek',
847                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
848                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
849                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
850             },
851             'params': {
852                 'youtube_include_dash_manifest': True,
853                 'format': '135',  # bestvideo
854             },
855             'skip': 'This live event has ended.',
856         },
857         {
858             # Multifeed videos (multiple cameras), URL is for Main Camera
859             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
860             'info_dict': {
861                 'id': 'jqWvoWXjCVs',
862                 'title': 'teamPGP: Rocket League Noob Stream',
863                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
864             },
865             'playlist': [{
866                 'info_dict': {
867                     'id': 'jqWvoWXjCVs',
868                     'ext': 'mp4',
869                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
870                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
871                     'duration': 7335,
872                     'upload_date': '20150721',
873                     'uploader': 'Beer Games Beer',
874                     'uploader_id': 'beergamesbeer',
875                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
876                     'license': 'Standard YouTube License',
877                 },
878             }, {
879                 'info_dict': {
880                     'id': '6h8e8xoXJzg',
881                     'ext': 'mp4',
882                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
883                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
884                     'duration': 7337,
885                     'upload_date': '20150721',
886                     'uploader': 'Beer Games Beer',
887                     'uploader_id': 'beergamesbeer',
888                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
889                     'license': 'Standard YouTube License',
890                 },
891             }, {
892                 'info_dict': {
893                     'id': 'PUOgX5z9xZw',
894                     'ext': 'mp4',
895                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
896                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
897                     'duration': 7337,
898                     'upload_date': '20150721',
899                     'uploader': 'Beer Games Beer',
900                     'uploader_id': 'beergamesbeer',
901                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
902                     'license': 'Standard YouTube License',
903                 },
904             }, {
905                 'info_dict': {
906                     'id': 'teuwxikvS5k',
907                     'ext': 'mp4',
908                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
909                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
910                     'duration': 7334,
911                     'upload_date': '20150721',
912                     'uploader': 'Beer Games Beer',
913                     'uploader_id': 'beergamesbeer',
914                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
915                     'license': 'Standard YouTube License',
916                 },
917             }],
918             'params': {
919                 'skip_download': True,
920             },
921             'skip': 'This video is not available.',
922         },
923         {
924             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
925             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
926             'info_dict': {
927                 'id': 'gVfLd0zydlo',
928                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
929             },
930             'playlist_count': 2,
931             'skip': 'Not multifeed anymore',
932         },
933         {
934             'url': 'https://vid.plus/FlRa-iH7PGw',
935             'only_matching': True,
936         },
937         {
938             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
939             'only_matching': True,
940         },
941         {
942             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
943             # Also tests cut-off URL expansion in video description (see
944             # https://github.com/ytdl-org/youtube-dl/issues/1892,
945             # https://github.com/ytdl-org/youtube-dl/issues/8164)
946             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
947             'info_dict': {
948                 'id': 'lsguqyKfVQg',
949                 'ext': 'mp4',
950                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
951                 'alt_title': 'Dark Walk - Position Music',
952                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
953                 'duration': 133,
954                 'upload_date': '20151119',
955                 'uploader_id': 'IronSoulElf',
956                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
957                 'uploader': 'IronSoulElf',
958                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
959                 'track': 'Dark Walk - Position Music',
960                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
961                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
962             },
963             'params': {
964                 'skip_download': True,
965             },
966         },
967         {
968             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
969             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
970             'only_matching': True,
971         },
972         {
973             # Video with yt:stretch=17:0
974             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
975             'info_dict': {
976                 'id': 'Q39EVAstoRM',
977                 'ext': 'mp4',
978                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
979                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
980                 'upload_date': '20151107',
981                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
982                 'uploader': 'CH GAMER DROID',
983             },
984             'params': {
985                 'skip_download': True,
986             },
987             'skip': 'This video does not exist.',
988         },
989         {
990             # Video licensed under Creative Commons
991             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
992             'info_dict': {
993                 'id': 'M4gD1WSo5mA',
994                 'ext': 'mp4',
995                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
996                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
997                 'duration': 721,
998                 'upload_date': '20150127',
999                 'uploader_id': 'BerkmanCenter',
1000                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1001                 'uploader': 'The Berkman Klein Center for Internet & Society',
1002                 'license': 'Creative Commons Attribution license (reuse allowed)',
1003             },
1004             'params': {
1005                 'skip_download': True,
1006             },
1007         },
1008         {
1009             # Channel-like uploader_url
1010             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1011             'info_dict': {
1012                 'id': 'eQcmzGIKrzg',
1013                 'ext': 'mp4',
1014                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1015                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1016                 'duration': 4060,
1017                 'upload_date': '20151119',
1018                 'uploader': 'Bernie Sanders',
1019                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1020                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1021                 'license': 'Creative Commons Attribution license (reuse allowed)',
1022             },
1023             'params': {
1024                 'skip_download': True,
1025             },
1026         },
1027         {
1028             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1029             'only_matching': True,
1030         },
1031         {
1032             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1033             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1034             'only_matching': True,
1035         },
1036         {
1037             # Rental video preview
1038             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1039             'info_dict': {
1040                 'id': 'uGpuVWrhIzE',
1041                 'ext': 'mp4',
1042                 'title': 'Piku - Trailer',
1043                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1044                 'upload_date': '20150811',
1045                 'uploader': 'FlixMatrix',
1046                 'uploader_id': 'FlixMatrixKaravan',
1047                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1048                 'license': 'Standard YouTube License',
1049             },
1050             'params': {
1051                 'skip_download': True,
1052             },
1053             'skip': 'This video is not available.',
1054         },
1055         {
1056             # YouTube Red video with episode data
1057             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1058             'info_dict': {
1059                 'id': 'iqKdEhx-dD4',
1060                 'ext': 'mp4',
1061                 'title': 'Isolation - Mind Field (Ep 1)',
1062                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1063                 'duration': 2085,
1064                 'upload_date': '20170118',
1065                 'uploader': 'Vsauce',
1066                 'uploader_id': 'Vsauce',
1067                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1068                 'series': 'Mind Field',
1069                 'season_number': 1,
1070                 'episode_number': 1,
1071             },
1072             'params': {
1073                 'skip_download': True,
1074             },
1075             'expected_warnings': [
1076                 'Skipping DASH manifest',
1077             ],
1078         },
1079         {
1080             # The following content has been identified by the YouTube community
1081             # as inappropriate or offensive to some audiences.
1082             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1083             'info_dict': {
1084                 'id': '6SJNVb0GnPI',
1085                 'ext': 'mp4',
1086                 'title': 'Race Differences in Intelligence',
1087                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1088                 'duration': 965,
1089                 'upload_date': '20140124',
1090                 'uploader': 'New Century Foundation',
1091                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1092                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1093             },
1094             'params': {
1095                 'skip_download': True,
1096             },
1097         },
1098         {
1099             # itag 212
1100             'url': '1t24XAntNCY',
1101             'only_matching': True,
1102         },
1103         {
1104             # geo restricted to JP
1105             'url': 'sJL6WA-aGkQ',
1106             'only_matching': True,
1107         },
1108         {
1109             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1110             'only_matching': True,
1111         },
1112         {
1113             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1114             'only_matching': True,
1115         },
1116         {
1117             # DRM protected
1118             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1119             'only_matching': True,
1120         },
1121         {
1122             # Video with unsupported adaptive stream type formats
1123             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1124             'info_dict': {
1125                 'id': 'Z4Vy8R84T1U',
1126                 'ext': 'mp4',
1127                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1128                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1129                 'duration': 433,
1130                 'upload_date': '20130923',
1131                 'uploader': 'Amelia Putri Harwita',
1132                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1133                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1134                 'formats': 'maxcount:10',
1135             },
1136             'params': {
1137                 'skip_download': True,
1138                 'youtube_include_dash_manifest': False,
1139             },
1140             'skip': 'not actual anymore',
1141         },
1142         {
1143             # Youtube Music Auto-generated description
1144             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1145             'info_dict': {
1146                 'id': 'MgNrAu2pzNs',
1147                 'ext': 'mp4',
1148                 'title': 'Voyeur Girl',
1149                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1150                 'upload_date': '20190312',
1151                 'uploader': 'Stephen - Topic',
1152                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1153                 'artist': 'Stephen',
1154                 'track': 'Voyeur Girl',
1155                 'album': 'it\'s too much love to know my dear',
1156                 'release_date': '20190313',
1157                 'release_year': 2019,
1158             },
1159             'params': {
1160                 'skip_download': True,
1161             },
1162         },
1163         {
1164             # Youtube Music Auto-generated description
1165             # Retrieve 'artist' field from 'Artist:' in video description
1166             # when it is present on youtube music video
1167             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1168             'info_dict': {
1169                 'id': 'k0jLE7tTwjY',
1170                 'ext': 'mp4',
1171                 'title': 'Latch Feat. Sam Smith',
1172                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1173                 'upload_date': '20150110',
1174                 'uploader': 'Various Artists - Topic',
1175                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1176                 'artist': 'Disclosure',
1177                 'track': 'Latch Feat. Sam Smith',
1178                 'album': 'Latch Featuring Sam Smith',
1179                 'release_date': '20121008',
1180                 'release_year': 2012,
1181             },
1182             'params': {
1183                 'skip_download': True,
1184             },
1185         },
1186         {
1187             # Youtube Music Auto-generated description
1188             # handle multiple artists on youtube music video
1189             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1190             'info_dict': {
1191                 'id': '74qn0eJSjpA',
1192                 'ext': 'mp4',
1193                 'title': 'Eastside',
1194                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1195                 'upload_date': '20180710',
1196                 'uploader': 'Benny Blanco - Topic',
1197                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1198                 'artist': 'benny blanco, Halsey, Khalid',
1199                 'track': 'Eastside',
1200                 'album': 'Eastside',
1201                 'release_date': '20180713',
1202                 'release_year': 2018,
1203             },
1204             'params': {
1205                 'skip_download': True,
1206             },
1207         },
1208         {
1209             # Youtube Music Auto-generated description
1210             # handle youtube music video with release_year and no release_date
1211             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1212             'info_dict': {
1213                 'id': '-hcAI0g-f5M',
1214                 'ext': 'mp4',
1215                 'title': 'Put It On Me',
1216                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1217                 'upload_date': '20180426',
1218                 'uploader': 'Matt Maeson - Topic',
1219                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1220                 'artist': 'Matt Maeson',
1221                 'track': 'Put It On Me',
1222                 'album': 'The Hearse',
1223                 'release_date': None,
1224                 'release_year': 2018,
1225             },
1226             'params': {
1227                 'skip_download': True,
1228             },
1229         },
1230         {
1231             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1232             'only_matching': True,
1233         },
1234         {
1235             # invalid -> valid video id redirection
1236             'url': 'DJztXj2GPfl',
1237             'info_dict': {
1238                 'id': 'DJztXj2GPfk',
1239                 'ext': 'mp4',
1240                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1241                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1242                 'upload_date': '20090125',
1243                 'uploader': 'Prochorowka',
1244                 'uploader_id': 'Prochorowka',
1245                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1246                 'artist': 'Panjabi MC',
1247                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1248                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1249             },
1250             'params': {
1251                 'skip_download': True,
1252             },
1253         }
1254     ]
1255
1256     def __init__(self, *args, **kwargs):
1257         super(YoutubeIE, self).__init__(*args, **kwargs)
1258         self._player_cache = {}
1259
1260     def report_video_info_webpage_download(self, video_id):
1261         """Report attempt to download video info webpage."""
1262         self.to_screen('%s: Downloading video info webpage' % video_id)
1263
1264     def report_information_extraction(self, video_id):
1265         """Report attempt to extract video information."""
1266         self.to_screen('%s: Extracting video information' % video_id)
1267
1268     def report_unavailable_format(self, video_id, format):
1269         """Report extracted video URL."""
1270         self.to_screen('%s: Format %s not available' % (video_id, format))
1271
1272     def report_rtmp_download(self):
1273         """Indicate the download will use the RTMP protocol."""
1274         self.to_screen('RTMP download detected')
1275
1276     def _signature_cache_id(self, example_sig):
1277         """ Return a string representation of a signature """
1278         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1279
1280     @classmethod
1281     def _extract_player_info(cls, player_url):
1282         for player_re in cls._PLAYER_INFO_RE:
1283             id_m = re.search(player_re, player_url)
1284             if id_m:
1285                 break
1286         else:
1287             raise ExtractorError('Cannot identify player %r' % player_url)
1288         return id_m.group('ext'), id_m.group('id')
1289
1290     def _extract_signature_function(self, video_id, player_url, example_sig):
1291         player_type, player_id = self._extract_player_info(player_url)
1292
1293         # Read from filesystem cache
1294         func_id = '%s_%s_%s' % (
1295             player_type, player_id, self._signature_cache_id(example_sig))
1296         assert os.path.basename(func_id) == func_id
1297
1298         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1299         if cache_spec is not None:
1300             return lambda s: ''.join(s[i] for i in cache_spec)
1301
1302         download_note = (
1303             'Downloading player %s' % player_url
1304             if self._downloader.params.get('verbose') else
1305             'Downloading %s player %s' % (player_type, player_id)
1306         )
1307         if player_type == 'js':
1308             code = self._download_webpage(
1309                 player_url, video_id,
1310                 note=download_note,
1311                 errnote='Download of %s failed' % player_url)
1312             res = self._parse_sig_js(code)
1313         elif player_type == 'swf':
1314             urlh = self._request_webpage(
1315                 player_url, video_id,
1316                 note=download_note,
1317                 errnote='Download of %s failed' % player_url)
1318             code = urlh.read()
1319             res = self._parse_sig_swf(code)
1320         else:
1321             assert False, 'Invalid player type %r' % player_type
1322
1323         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1324         cache_res = res(test_string)
1325         cache_spec = [ord(c) for c in cache_res]
1326
1327         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1328         return res
1329
1330     def _print_sig_code(self, func, example_sig):
1331         def gen_sig_code(idxs):
1332             def _genslice(start, end, step):
1333                 starts = '' if start == 0 else str(start)
1334                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1335                 steps = '' if step == 1 else (':%d' % step)
1336                 return 's[%s%s%s]' % (starts, ends, steps)
1337
1338             step = None
1339             # Quelch pyflakes warnings - start will be set when step is set
1340             start = '(Never used)'
1341             for i, prev in zip(idxs[1:], idxs[:-1]):
1342                 if step is not None:
1343                     if i - prev == step:
1344                         continue
1345                     yield _genslice(start, prev, step)
1346                     step = None
1347                     continue
1348                 if i - prev in [-1, 1]:
1349                     step = i - prev
1350                     start = prev
1351                     continue
1352                 else:
1353                     yield 's[%d]' % prev
1354             if step is None:
1355                 yield 's[%d]' % i
1356             else:
1357                 yield _genslice(start, i, step)
1358
1359         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1360         cache_res = func(test_string)
1361         cache_spec = [ord(c) for c in cache_res]
1362         expr_code = ' + '.join(gen_sig_code(cache_spec))
1363         signature_id_tuple = '(%s)' % (
1364             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1365         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1366                 '    return %s\n') % (signature_id_tuple, expr_code)
1367         self.to_screen('Extracted signature function:\n' + code)
1368
1369     def _parse_sig_js(self, jscode):
1370         funcname = self._search_regex(
1371             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1372              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1373              r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1374              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1375              # Obsolete patterns
1376              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1377              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1378              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1379              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1381              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1382              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1383              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1384             jscode, 'Initial JS player signature function name', group='sig')
1385
1386         jsi = JSInterpreter(jscode)
1387         initial_function = jsi.extract_function(funcname)
1388         return lambda s: initial_function([s])
1389
1390     def _parse_sig_swf(self, file_contents):
1391         swfi = SWFInterpreter(file_contents)
1392         TARGET_CLASSNAME = 'SignatureDecipher'
1393         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1394         initial_function = swfi.extract_function(searched_class, 'decipher')
1395         return lambda s: initial_function([s])
1396
1397     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1398         """Turn the encrypted s field into a working signature"""
1399
1400         if player_url is None:
1401             raise ExtractorError('Cannot decrypt signature without player_url')
1402
1403         if player_url.startswith('//'):
1404             player_url = 'https:' + player_url
1405         elif not re.match(r'https?://', player_url):
1406             player_url = compat_urlparse.urljoin(
1407                 'https://www.youtube.com', player_url)
1408         try:
1409             player_id = (player_url, self._signature_cache_id(s))
1410             if player_id not in self._player_cache:
1411                 func = self._extract_signature_function(
1412                     video_id, player_url, s
1413                 )
1414                 self._player_cache[player_id] = func
1415             func = self._player_cache[player_id]
1416             if self._downloader.params.get('youtube_print_sig_code'):
1417                 self._print_sig_code(func, s)
1418             return func(s)
1419         except Exception as e:
1420             tb = traceback.format_exc()
1421             raise ExtractorError(
1422                 'Signature extraction failed: ' + tb, cause=e)
1423
1424     def _get_subtitles(self, video_id, webpage):
1425         try:
1426             subs_doc = self._download_xml(
1427                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1428                 video_id, note=False)
1429         except ExtractorError as err:
1430             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1431             return {}
1432
1433         sub_lang_list = {}
1434         for track in subs_doc.findall('track'):
1435             lang = track.attrib['lang_code']
1436             if lang in sub_lang_list:
1437                 continue
1438             sub_formats = []
1439             for ext in self._SUBTITLE_FORMATS:
1440                 params = compat_urllib_parse_urlencode({
1441                     'lang': lang,
1442                     'v': video_id,
1443                     'fmt': ext,
1444                     'name': track.attrib['name'].encode('utf-8'),
1445                 })
1446                 sub_formats.append({
1447                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1448                     'ext': ext,
1449                 })
1450             sub_lang_list[lang] = sub_formats
1451         if not sub_lang_list:
1452             self._downloader.report_warning('video doesn\'t have subtitles')
1453             return {}
1454         return sub_lang_list
1455
1456     def _get_ytplayer_config(self, video_id, webpage):
1457         patterns = (
1458             # User data may contain arbitrary character sequences that may affect
1459             # JSON extraction with regex, e.g. when '};' is contained the second
1460             # regex won't capture the whole JSON. Yet working around by trying more
1461             # concrete regex first keeping in mind proper quoted string handling
1462             # to be implemented in future that will replace this workaround (see
1463             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1464             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1465             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1466             r';ytplayer\.config\s*=\s*({.+?});',
1467         )
1468         config = self._search_regex(
1469             patterns, webpage, 'ytplayer.config', default=None)
1470         if config:
1471             return self._parse_json(
1472                 uppercase_escape(config), video_id, fatal=False)
1473
1474     def _get_automatic_captions(self, video_id, webpage):
1475         """We need the webpage for getting the captions url, pass it as an
1476            argument to speed up the process."""
1477         self.to_screen('%s: Looking for automatic captions' % video_id)
1478         player_config = self._get_ytplayer_config(video_id, webpage)
1479         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1480         if not player_config:
1481             self._downloader.report_warning(err_msg)
1482             return {}
1483         try:
1484             args = player_config['args']
1485             caption_url = args.get('ttsurl')
1486             if caption_url:
1487                 timestamp = args['timestamp']
1488                 # We get the available subtitles
1489                 list_params = compat_urllib_parse_urlencode({
1490                     'type': 'list',
1491                     'tlangs': 1,
1492                     'asrs': 1,
1493                 })
1494                 list_url = caption_url + '&' + list_params
1495                 caption_list = self._download_xml(list_url, video_id)
1496                 original_lang_node = caption_list.find('track')
1497                 if original_lang_node is None:
1498                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1499                     return {}
1500                 original_lang = original_lang_node.attrib['lang_code']
1501                 caption_kind = original_lang_node.attrib.get('kind', '')
1502
1503                 sub_lang_list = {}
1504                 for lang_node in caption_list.findall('target'):
1505                     sub_lang = lang_node.attrib['lang_code']
1506                     sub_formats = []
1507                     for ext in self._SUBTITLE_FORMATS:
1508                         params = compat_urllib_parse_urlencode({
1509                             'lang': original_lang,
1510                             'tlang': sub_lang,
1511                             'fmt': ext,
1512                             'ts': timestamp,
1513                             'kind': caption_kind,
1514                         })
1515                         sub_formats.append({
1516                             'url': caption_url + '&' + params,
1517                             'ext': ext,
1518                         })
1519                     sub_lang_list[sub_lang] = sub_formats
1520                 return sub_lang_list
1521
1522             def make_captions(sub_url, sub_langs):
1523                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1524                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1525                 captions = {}
1526                 for sub_lang in sub_langs:
1527                     sub_formats = []
1528                     for ext in self._SUBTITLE_FORMATS:
1529                         caption_qs.update({
1530                             'tlang': [sub_lang],
1531                             'fmt': [ext],
1532                         })
1533                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1534                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1535                         sub_formats.append({
1536                             'url': sub_url,
1537                             'ext': ext,
1538                         })
1539                     captions[sub_lang] = sub_formats
1540                 return captions
1541
1542             # New captions format as of 22.06.2017
1543             player_response = args.get('player_response')
1544             if player_response and isinstance(player_response, compat_str):
1545                 player_response = self._parse_json(
1546                     player_response, video_id, fatal=False)
1547                 if player_response:
1548                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1549                     base_url = renderer['captionTracks'][0]['baseUrl']
1550                     sub_lang_list = []
1551                     for lang in renderer['translationLanguages']:
1552                         lang_code = lang.get('languageCode')
1553                         if lang_code:
1554                             sub_lang_list.append(lang_code)
1555                     return make_captions(base_url, sub_lang_list)
1556
1557             # Some videos don't provide ttsurl but rather caption_tracks and
1558             # caption_translation_languages (e.g. 20LmZk1hakA)
1559             # Does not used anymore as of 22.06.2017
1560             caption_tracks = args['caption_tracks']
1561             caption_translation_languages = args['caption_translation_languages']
1562             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1563             sub_lang_list = []
1564             for lang in caption_translation_languages.split(','):
1565                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1566                 sub_lang = lang_qs.get('lc', [None])[0]
1567                 if sub_lang:
1568                     sub_lang_list.append(sub_lang)
1569             return make_captions(caption_url, sub_lang_list)
1570         # An extractor error can be raise by the download process if there are
1571         # no automatic captions but there are subtitles
1572         except (KeyError, IndexError, ExtractorError):
1573             self._downloader.report_warning(err_msg)
1574             return {}
1575
1576     def _mark_watched(self, video_id, video_info, player_response):
1577         playback_url = url_or_none(try_get(
1578             player_response,
1579             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1580             video_info, lambda x: x['videostats_playback_base_url'][0]))
1581         if not playback_url:
1582             return
1583         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1584         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1585
1586         # cpn generation algorithm is reverse engineered from base.js.
1587         # In fact it works even with dummy cpn.
1588         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1589         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1590
1591         qs.update({
1592             'ver': ['2'],
1593             'cpn': [cpn],
1594         })
1595         playback_url = compat_urlparse.urlunparse(
1596             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1597
1598         self._download_webpage(
1599             playback_url, video_id, 'Marking watched',
1600             'Unable to mark watched', fatal=False)
1601
1602     @staticmethod
1603     def _extract_urls(webpage):
1604         # Embedded YouTube player
1605         entries = [
1606             unescapeHTML(mobj.group('url'))
1607             for mobj in re.finditer(r'''(?x)
1608             (?:
1609                 <iframe[^>]+?src=|
1610                 data-video-url=|
1611                 <embed[^>]+?src=|
1612                 embedSWF\(?:\s*|
1613                 <object[^>]+data=|
1614                 new\s+SWFObject\(
1615             )
1616             (["\'])
1617                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1618                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1619             \1''', webpage)]
1620
1621         # lazyYT YouTube embed
1622         entries.extend(list(map(
1623             unescapeHTML,
1624             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1625
1626         # Wordpress "YouTube Video Importer" plugin
1627         matches = re.findall(r'''(?x)<div[^>]+
1628             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1629             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1630         entries.extend(m[-1] for m in matches)
1631
1632         return entries
1633
1634     @staticmethod
1635     def _extract_url(webpage):
1636         urls = YoutubeIE._extract_urls(webpage)
1637         return urls[0] if urls else None
1638
1639     @classmethod
1640     def extract_id(cls, url):
1641         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1642         if mobj is None:
1643             raise ExtractorError('Invalid URL: %s' % url)
1644         video_id = mobj.group(2)
1645         return video_id
1646
1647     @staticmethod
1648     def _extract_chapters(description, duration):
1649         if not description:
1650             return None
1651         chapter_lines = re.findall(
1652             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1653             description)
1654         if not chapter_lines:
1655             return None
1656         chapters = []
1657         for next_num, (chapter_line, time_point) in enumerate(
1658                 chapter_lines, start=1):
1659             start_time = parse_duration(time_point)
1660             if start_time is None:
1661                 continue
1662             if start_time > duration:
1663                 break
1664             end_time = (duration if next_num == len(chapter_lines)
1665                         else parse_duration(chapter_lines[next_num][1]))
1666             if end_time is None:
1667                 continue
1668             if end_time > duration:
1669                 end_time = duration
1670             if start_time > end_time:
1671                 break
1672             chapter_title = re.sub(
1673                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1674             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1675             chapters.append({
1676                 'start_time': start_time,
1677                 'end_time': end_time,
1678                 'title': chapter_title,
1679             })
1680         return chapters
1681
1682     def _real_extract(self, url):
1683         url, smuggled_data = unsmuggle_url(url, {})
1684
1685         proto = (
1686             'http' if self._downloader.params.get('prefer_insecure', False)
1687             else 'https')
1688
1689         start_time = None
1690         end_time = None
1691         parsed_url = compat_urllib_parse_urlparse(url)
1692         for component in [parsed_url.fragment, parsed_url.query]:
1693             query = compat_parse_qs(component)
1694             if start_time is None and 't' in query:
1695                 start_time = parse_duration(query['t'][0])
1696             if start_time is None and 'start' in query:
1697                 start_time = parse_duration(query['start'][0])
1698             if end_time is None and 'end' in query:
1699                 end_time = parse_duration(query['end'][0])
1700
1701         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1702         mobj = re.search(self._NEXT_URL_RE, url)
1703         if mobj:
1704             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1705         video_id = self.extract_id(url)
1706
1707         # Get video webpage
1708         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1709         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1710
1711         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1712         video_id = qs.get('v', [None])[0] or video_id
1713
1714         # Attempt to extract SWF player URL
1715         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1716         if mobj is not None:
1717             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1718         else:
1719             player_url = None
1720
1721         dash_mpds = []
1722
1723         def add_dash_mpd(video_info):
1724             dash_mpd = video_info.get('dashmpd')
1725             if dash_mpd and dash_mpd[0] not in dash_mpds:
1726                 dash_mpds.append(dash_mpd[0])
1727
1728         def add_dash_mpd_pr(pl_response):
1729             dash_mpd = url_or_none(try_get(
1730                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1731                 compat_str))
1732             if dash_mpd and dash_mpd not in dash_mpds:
1733                 dash_mpds.append(dash_mpd)
1734
1735         is_live = None
1736         view_count = None
1737
1738         def extract_view_count(v_info):
1739             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1740
1741         def extract_player_response(player_response, video_id):
1742             pl_response = str_or_none(player_response)
1743             if not pl_response:
1744                 return
1745             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1746             if isinstance(pl_response, dict):
1747                 add_dash_mpd_pr(pl_response)
1748                 return pl_response
1749
1750         player_response = {}
1751
1752         # Get video info
1753         video_info = {}
1754         embed_webpage = None
1755         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1756             age_gate = True
1757             # We simulate the access to the video from www.youtube.com/v/{video_id}
1758             # this can be viewed without login into Youtube
1759             url = proto + '://www.youtube.com/embed/%s' % video_id
1760             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1761             data = compat_urllib_parse_urlencode({
1762                 'video_id': video_id,
1763                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1764                 'sts': self._search_regex(
1765                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1766             })
1767             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1768             try:
1769                 video_info_webpage = self._download_webpage(
1770                     video_info_url, video_id,
1771                     note='Refetching age-gated info webpage',
1772                     errnote='unable to download video info webpage')
1773             except ExtractorError:
1774                 video_info_webpage = None
1775             if video_info_webpage:
1776                 video_info = compat_parse_qs(video_info_webpage)
1777                 pl_response = video_info.get('player_response', [None])[0]
1778                 player_response = extract_player_response(pl_response, video_id)
1779                 add_dash_mpd(video_info)
1780                 view_count = extract_view_count(video_info)
1781         else:
1782             age_gate = False
1783             # Try looking directly into the video webpage
1784             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1785             if ytplayer_config:
1786                 args = ytplayer_config['args']
1787                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1788                     # Convert to the same format returned by compat_parse_qs
1789                     video_info = dict((k, [v]) for k, v in args.items())
1790                     add_dash_mpd(video_info)
1791                 # Rental video is not rented but preview is available (e.g.
1792                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1793                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1794                 if not video_info and args.get('ypc_vid'):
1795                     return self.url_result(
1796                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1797                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1798                     is_live = True
1799                 if not player_response:
1800                     player_response = extract_player_response(args.get('player_response'), video_id)
1801             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1802                 add_dash_mpd_pr(player_response)
1803
1804         def extract_unavailable_message():
1805             messages = []
1806             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1807                 msg = self._html_search_regex(
1808                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1809                     video_webpage, 'unavailable %s' % kind, default=None)
1810                 if msg:
1811                     messages.append(msg)
1812             if messages:
1813                 return '\n'.join(messages)
1814
1815         if not video_info and not player_response:
1816             unavailable_message = extract_unavailable_message()
1817             if not unavailable_message:
1818                 unavailable_message = 'Unable to extract video data'
1819             raise ExtractorError(
1820                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1821
1822         if not isinstance(video_info, dict):
1823             video_info = {}
1824
1825         video_details = try_get(
1826             player_response, lambda x: x['videoDetails'], dict) or {}
1827
1828         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1829         if not video_title:
1830             self._downloader.report_warning('Unable to extract video title')
1831             video_title = '_'
1832
1833         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1834         if video_description:
1835
1836             def replace_url(m):
1837                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1838                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1839                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1840                     qs = compat_parse_qs(parsed_redir_url.query)
1841                     q = qs.get('q')
1842                     if q and q[0]:
1843                         return q[0]
1844                 return redir_url
1845
1846             description_original = video_description = re.sub(r'''(?x)
1847                 <a\s+
1848                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1849                     (?:title|href)="([^"]+)"\s+
1850                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1851                     class="[^"]*"[^>]*>
1852                 [^<]+\.{3}\s*
1853                 </a>
1854             ''', replace_url, video_description)
1855             video_description = clean_html(video_description)
1856         else:
1857             video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1858
1859         if not smuggled_data.get('force_singlefeed', False):
1860             if not self._downloader.params.get('noplaylist'):
1861                 multifeed_metadata_list = try_get(
1862                     player_response,
1863                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1864                     compat_str) or try_get(
1865                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1866                 if multifeed_metadata_list:
1867                     entries = []
1868                     feed_ids = []
1869                     for feed in multifeed_metadata_list.split(','):
1870                         # Unquote should take place before split on comma (,) since textual
1871                         # fields may contain comma as well (see
1872                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1873                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1874
1875                         def feed_entry(name):
1876                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1877
1878                         feed_id = feed_entry('id')
1879                         if not feed_id:
1880                             continue
1881                         feed_title = feed_entry('title')
1882                         title = video_title
1883                         if feed_title:
1884                             title += ' (%s)' % feed_title
1885                         entries.append({
1886                             '_type': 'url_transparent',
1887                             'ie_key': 'Youtube',
1888                             'url': smuggle_url(
1889                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1890                                 {'force_singlefeed': True}),
1891                             'title': title,
1892                         })
1893                         feed_ids.append(feed_id)
1894                     self.to_screen(
1895                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1896                         % (', '.join(feed_ids), video_id))
1897                     return self.playlist_result(entries, video_id, video_title, video_description)
1898             else:
1899                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1900
1901         if view_count is None:
1902             view_count = extract_view_count(video_info)
1903         if view_count is None and video_details:
1904             view_count = int_or_none(video_details.get('viewCount'))
1905
1906         if is_live is None:
1907             is_live = bool_or_none(video_details.get('isLive'))
1908
1909         # Check for "rental" videos
1910         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1911             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1912
1913         def _extract_filesize(media_url):
1914             return int_or_none(self._search_regex(
1915                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1916
1917         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1918         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1919
1920         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1921             self.report_rtmp_download()
1922             formats = [{
1923                 'format_id': '_rtmp',
1924                 'protocol': 'rtmp',
1925                 'url': video_info['conn'][0],
1926                 'player_url': player_url,
1927             }]
1928         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1929             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1930             if 'rtmpe%3Dyes' in encoded_url_map:
1931                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1932             formats = []
1933             formats_spec = {}
1934             fmt_list = video_info.get('fmt_list', [''])[0]
1935             if fmt_list:
1936                 for fmt in fmt_list.split(','):
1937                     spec = fmt.split('/')
1938                     if len(spec) > 1:
1939                         width_height = spec[1].split('x')
1940                         if len(width_height) == 2:
1941                             formats_spec[spec[0]] = {
1942                                 'resolution': spec[1],
1943                                 'width': int_or_none(width_height[0]),
1944                                 'height': int_or_none(width_height[1]),
1945                             }
1946             for fmt in streaming_formats:
1947                 itag = str_or_none(fmt.get('itag'))
1948                 if not itag:
1949                     continue
1950                 quality = fmt.get('quality')
1951                 quality_label = fmt.get('qualityLabel') or quality
1952                 formats_spec[itag] = {
1953                     'asr': int_or_none(fmt.get('audioSampleRate')),
1954                     'filesize': int_or_none(fmt.get('contentLength')),
1955                     'format_note': quality_label,
1956                     'fps': int_or_none(fmt.get('fps')),
1957                     'height': int_or_none(fmt.get('height')),
1958                     # bitrate for itag 43 is always 2147483647
1959                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1960                     'width': int_or_none(fmt.get('width')),
1961                 }
1962
1963             for fmt in streaming_formats:
1964                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1965                     continue
1966                 url = url_or_none(fmt.get('url'))
1967
1968                 if not url:
1969                     cipher = fmt.get('cipher')
1970                     if not cipher:
1971                         continue
1972                     url_data = compat_parse_qs(cipher)
1973                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1974                     if not url:
1975                         continue
1976                 else:
1977                     cipher = None
1978                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1979
1980                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1981                 # Unsupported FORMAT_STREAM_TYPE_OTF
1982                 if stream_type == 3:
1983                     continue
1984
1985                 format_id = fmt.get('itag') or url_data['itag'][0]
1986                 if not format_id:
1987                     continue
1988                 format_id = compat_str(format_id)
1989
1990                 if cipher:
1991                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1992                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1993                         jsplayer_url_json = self._search_regex(
1994                             ASSETS_RE,
1995                             embed_webpage if age_gate else video_webpage,
1996                             'JS player URL (1)', default=None)
1997                         if not jsplayer_url_json and not age_gate:
1998                             # We need the embed website after all
1999                             if embed_webpage is None:
2000                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2001                                 embed_webpage = self._download_webpage(
2002                                     embed_url, video_id, 'Downloading embed webpage')
2003                             jsplayer_url_json = self._search_regex(
2004                                 ASSETS_RE, embed_webpage, 'JS player URL')
2005
2006                         player_url = json.loads(jsplayer_url_json)
2007                         if player_url is None:
2008                             player_url_json = self._search_regex(
2009                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2010                                 video_webpage, 'age gate player URL')
2011                             player_url = json.loads(player_url_json)
2012
2013                     if 'sig' in url_data:
2014                         url += '&signature=' + url_data['sig'][0]
2015                     elif 's' in url_data:
2016                         encrypted_sig = url_data['s'][0]
2017
2018                         if self._downloader.params.get('verbose'):
2019                             if player_url is None:
2020                                 player_desc = 'unknown'
2021                             else:
2022                                 player_type, player_version = self._extract_player_info(player_url)
2023                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2024                             parts_sizes = self._signature_cache_id(encrypted_sig)
2025                             self.to_screen('{%s} signature length %s, %s' %
2026                                            (format_id, parts_sizes, player_desc))
2027
2028                         signature = self._decrypt_signature(
2029                             encrypted_sig, video_id, player_url, age_gate)
2030                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2031                         url += '&%s=%s' % (sp, signature)
2032                 if 'ratebypass' not in url:
2033                     url += '&ratebypass=yes'
2034
2035                 dct = {
2036                     'format_id': format_id,
2037                     'url': url,
2038                     'player_url': player_url,
2039                 }
2040                 if format_id in self._formats:
2041                     dct.update(self._formats[format_id])
2042                 if format_id in formats_spec:
2043                     dct.update(formats_spec[format_id])
2044
2045                 # Some itags are not included in DASH manifest thus corresponding formats will
2046                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2047                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2048                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2049                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2050
2051                 if width is None:
2052                     width = int_or_none(fmt.get('width'))
2053                 if height is None:
2054                     height = int_or_none(fmt.get('height'))
2055
2056                 filesize = int_or_none(url_data.get(
2057                     'clen', [None])[0]) or _extract_filesize(url)
2058
2059                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2060                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2061
2062                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2063                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2064                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2065
2066                 more_fields = {
2067                     'filesize': filesize,
2068                     'tbr': tbr,
2069                     'width': width,
2070                     'height': height,
2071                     'fps': fps,
2072                     'format_note': quality_label or quality,
2073                 }
2074                 for key, value in more_fields.items():
2075                     if value:
2076                         dct[key] = value
2077                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2078                 if type_:
2079                     type_split = type_.split(';')
2080                     kind_ext = type_split[0].split('/')
2081                     if len(kind_ext) == 2:
2082                         kind, _ = kind_ext
2083                         dct['ext'] = mimetype2ext(type_split[0])
2084                         if kind in ('audio', 'video'):
2085                             codecs = None
2086                             for mobj in re.finditer(
2087                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2088                                 if mobj.group('key') == 'codecs':
2089                                     codecs = mobj.group('val')
2090                                     break
2091                             if codecs:
2092                                 dct.update(parse_codecs(codecs))
2093                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2094                     dct['downloader_options'] = {
2095                         # Youtube throttles chunks >~10M
2096                         'http_chunk_size': 10485760,
2097                     }
2098                 formats.append(dct)
2099         else:
2100             manifest_url = (
2101                 url_or_none(try_get(
2102                     player_response,
2103                     lambda x: x['streamingData']['hlsManifestUrl'],
2104                     compat_str))
2105                 or url_or_none(try_get(
2106                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2107             if manifest_url:
2108                 formats = []
2109                 m3u8_formats = self._extract_m3u8_formats(
2110                     manifest_url, video_id, 'mp4', fatal=False)
2111                 for a_format in m3u8_formats:
2112                     itag = self._search_regex(
2113                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2114                     if itag:
2115                         a_format['format_id'] = itag
2116                         if itag in self._formats:
2117                             dct = self._formats[itag].copy()
2118                             dct.update(a_format)
2119                             a_format = dct
2120                     a_format['player_url'] = player_url
2121                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2122                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2123                     formats.append(a_format)
2124             else:
2125                 error_message = extract_unavailable_message()
2126                 if not error_message:
2127                     error_message = clean_html(try_get(
2128                         player_response, lambda x: x['playabilityStatus']['reason'],
2129                         compat_str))
2130                 if not error_message:
2131                     error_message = clean_html(
2132                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2133                 if error_message:
2134                     raise ExtractorError(error_message, expected=True)
2135                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2136
2137         # uploader
2138         video_uploader = try_get(
2139             video_info, lambda x: x['author'][0],
2140             compat_str) or str_or_none(video_details.get('author'))
2141         if video_uploader:
2142             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2143         else:
2144             self._downloader.report_warning('unable to extract uploader name')
2145
2146         # uploader_id
2147         video_uploader_id = None
2148         video_uploader_url = None
2149         mobj = re.search(
2150             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2151             video_webpage)
2152         if mobj is not None:
2153             video_uploader_id = mobj.group('uploader_id')
2154             video_uploader_url = mobj.group('uploader_url')
2155         else:
2156             self._downloader.report_warning('unable to extract uploader nickname')
2157
2158         channel_id = (
2159             str_or_none(video_details.get('channelId'))
2160             or self._html_search_meta(
2161                 'channelId', video_webpage, 'channel id', default=None)
2162             or self._search_regex(
2163                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2164                 video_webpage, 'channel id', default=None, group='id'))
2165         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2166
2167         # thumbnail image
2168         # We try first to get a high quality image:
2169         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2170                             video_webpage, re.DOTALL)
2171         if m_thumb is not None:
2172             video_thumbnail = m_thumb.group(1)
2173         elif 'thumbnail_url' not in video_info:
2174             self._downloader.report_warning('unable to extract video thumbnail')
2175             video_thumbnail = None
2176         else:   # don't panic if we can't find it
2177             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2178
2179         # upload date
2180         upload_date = self._html_search_meta(
2181             'datePublished', video_webpage, 'upload date', default=None)
2182         if not upload_date:
2183             upload_date = self._search_regex(
2184                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2185                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2186                 video_webpage, 'upload date', default=None)
2187         upload_date = unified_strdate(upload_date)
2188
2189         video_license = self._html_search_regex(
2190             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2191             video_webpage, 'license', default=None)
2192
2193         m_music = re.search(
2194             r'''(?x)
2195                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2196                 <ul[^>]*>\s*
2197                 <li>(?P<title>.+?)
2198                 by (?P<creator>.+?)
2199                 (?:
2200                     \(.+?\)|
2201                     <a[^>]*
2202                         (?:
2203                             \bhref=["\']/red[^>]*>|             # drop possible
2204                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2205                         )
2206                     .*?
2207                 )?</li
2208             ''',
2209             video_webpage)
2210         if m_music:
2211             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2212             video_creator = clean_html(m_music.group('creator'))
2213         else:
2214             video_alt_title = video_creator = None
2215
2216         def extract_meta(field):
2217             return self._html_search_regex(
2218                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2219                 video_webpage, field, default=None)
2220
2221         track = extract_meta('Song')
2222         artist = extract_meta('Artist')
2223         album = extract_meta('Album')
2224
2225         # Youtube Music Auto-generated description
2226         release_date = release_year = None
2227         if video_description:
2228             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2229             if mobj:
2230                 if not track:
2231                     track = mobj.group('track').strip()
2232                 if not artist:
2233                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2234                 if not album:
2235                     album = mobj.group('album'.strip())
2236                 release_year = mobj.group('release_year')
2237                 release_date = mobj.group('release_date')
2238                 if release_date:
2239                     release_date = release_date.replace('-', '')
2240                     if not release_year:
2241                         release_year = int(release_date[:4])
2242                 if release_year:
2243                     release_year = int(release_year)
2244
2245         m_episode = re.search(
2246             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2247             video_webpage)
2248         if m_episode:
2249             series = unescapeHTML(m_episode.group('series'))
2250             season_number = int(m_episode.group('season'))
2251             episode_number = int(m_episode.group('episode'))
2252         else:
2253             series = season_number = episode_number = None
2254
2255         m_cat_container = self._search_regex(
2256             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2257             video_webpage, 'categories', default=None)
2258         if m_cat_container:
2259             category = self._html_search_regex(
2260                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2261                 default=None)
2262             video_categories = None if category is None else [category]
2263         else:
2264             video_categories = None
2265
2266         video_tags = [
2267             unescapeHTML(m.group('content'))
2268             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2269
2270         def _extract_count(count_name):
2271             return str_to_int(self._search_regex(
2272                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2273                 % re.escape(count_name),
2274                 video_webpage, count_name, default=None))
2275
2276         like_count = _extract_count('like')
2277         dislike_count = _extract_count('dislike')
2278
2279         if view_count is None:
2280             view_count = str_to_int(self._search_regex(
2281                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2282                 'view count', default=None))
2283
2284         average_rating = (
2285             float_or_none(video_details.get('averageRating'))
2286             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2287
2288         # subtitles
2289         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2290         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2291
2292         video_duration = try_get(
2293             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2294         if not video_duration:
2295             video_duration = int_or_none(video_details.get('lengthSeconds'))
2296         if not video_duration:
2297             video_duration = parse_duration(self._html_search_meta(
2298                 'duration', video_webpage, 'video duration'))
2299
2300         # annotations
2301         video_annotations = None
2302         if self._downloader.params.get('writeannotations', False):
2303             xsrf_token = self._search_regex(
2304                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2305                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2306             invideo_url = try_get(
2307                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2308             if xsrf_token and invideo_url:
2309                 xsrf_field_name = self._search_regex(
2310                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2311                     video_webpage, 'xsrf field name',
2312                     group='xsrf_field_name', default='session_token')
2313                 video_annotations = self._download_webpage(
2314                     self._proto_relative_url(invideo_url),
2315                     video_id, note='Downloading annotations',
2316                     errnote='Unable to download video annotations', fatal=False,
2317                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2318
2319         chapters = self._extract_chapters(description_original, video_duration)
2320
2321         # Look for the DASH manifest
2322         if self._downloader.params.get('youtube_include_dash_manifest', True):
2323             dash_mpd_fatal = True
2324             for mpd_url in dash_mpds:
2325                 dash_formats = {}
2326                 try:
2327                     def decrypt_sig(mobj):
2328                         s = mobj.group(1)
2329                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2330                         return '/signature/%s' % dec_s
2331
2332                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2333
2334                     for df in self._extract_mpd_formats(
2335                             mpd_url, video_id, fatal=dash_mpd_fatal,
2336                             formats_dict=self._formats):
2337                         if not df.get('filesize'):
2338                             df['filesize'] = _extract_filesize(df['url'])
2339                         # Do not overwrite DASH format found in some previous DASH manifest
2340                         if df['format_id'] not in dash_formats:
2341                             dash_formats[df['format_id']] = df
2342                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2343                         # allow them to fail without bug report message if we already have
2344                         # some DASH manifest succeeded. This is temporary workaround to reduce
2345                         # burst of bug reports until we figure out the reason and whether it
2346                         # can be fixed at all.
2347                         dash_mpd_fatal = False
2348                 except (ExtractorError, KeyError) as e:
2349                     self.report_warning(
2350                         'Skipping DASH manifest: %r' % e, video_id)
2351                 if dash_formats:
2352                     # Remove the formats we found through non-DASH, they
2353                     # contain less info and it can be wrong, because we use
2354                     # fixed values (for example the resolution). See
2355                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2356                     # example.
2357                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2358                     formats.extend(dash_formats.values())
2359
2360         # Check for malformed aspect ratio
2361         stretched_m = re.search(
2362             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2363             video_webpage)
2364         if stretched_m:
2365             w = float(stretched_m.group('w'))
2366             h = float(stretched_m.group('h'))
2367             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2368             # We will only process correct ratios.
2369             if w > 0 and h > 0:
2370                 ratio = w / h
2371                 for f in formats:
2372                     if f.get('vcodec') != 'none':
2373                         f['stretched_ratio'] = ratio
2374
2375         if not formats:
2376             if 'reason' in video_info:
2377                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2378                     regions_allowed = self._html_search_meta(
2379                         'regionsAllowed', video_webpage, default=None)
2380                     countries = regions_allowed.split(',') if regions_allowed else None
2381                     self.raise_geo_restricted(
2382                         msg=video_info['reason'][0], countries=countries)
2383                 reason = video_info['reason'][0]
2384                 if 'Invalid parameters' in reason:
2385                     unavailable_message = extract_unavailable_message()
2386                     if unavailable_message:
2387                         reason = unavailable_message
2388                 raise ExtractorError(
2389                     'YouTube said: %s' % reason,
2390                     expected=True, video_id=video_id)
2391             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2392                 raise ExtractorError('This video is DRM protected.', expected=True)
2393
2394         self._sort_formats(formats)
2395
2396         self.mark_watched(video_id, video_info, player_response)
2397
2398         return {
2399             'id': video_id,
2400             'uploader': video_uploader,
2401             'uploader_id': video_uploader_id,
2402             'uploader_url': video_uploader_url,
2403             'channel_id': channel_id,
2404             'channel_url': channel_url,
2405             'upload_date': upload_date,
2406             'license': video_license,
2407             'creator': video_creator or artist,
2408             'title': video_title,
2409             'alt_title': video_alt_title or track,
2410             'thumbnail': video_thumbnail,
2411             'description': video_description,
2412             'categories': video_categories,
2413             'tags': video_tags,
2414             'subtitles': video_subtitles,
2415             'automatic_captions': automatic_captions,
2416             'duration': video_duration,
2417             'age_limit': 18 if age_gate else 0,
2418             'annotations': video_annotations,
2419             'chapters': chapters,
2420             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2421             'view_count': view_count,
2422             'like_count': like_count,
2423             'dislike_count': dislike_count,
2424             'average_rating': average_rating,
2425             'formats': formats,
2426             'is_live': is_live,
2427             'start_time': start_time,
2428             'end_time': end_time,
2429             'series': series,
2430             'season_number': season_number,
2431             'episode_number': episode_number,
2432             'track': track,
2433             'artist': artist,
2434             'album': album,
2435             'release_date': release_date,
2436             'release_year': release_year,
2437         }
2438
2439
2440 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2441     IE_DESC = 'YouTube.com playlists'
2442     _VALID_URL = r"""(?x)(?:
2443                         (?:https?://)?
2444                         (?:\w+\.)?
2445                         (?:
2446                             (?:
2447                                 youtube(?:kids)?\.com|
2448                                 invidio\.us
2449                             )
2450                             /
2451                             (?:
2452                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2453                                \? (?:.*?[&;])*? (?:p|a|list)=
2454                             |  p/
2455                             )|
2456                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2457                         )
2458                         (
2459                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2460                             # Top tracks, they can also include dots
2461                             |(?:MC)[\w\.]*
2462                         )
2463                         .*
2464                      |
2465                         (%(playlist_id)s)
2466                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2467     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2468     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2469     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2470     IE_NAME = 'youtube:playlist'
2471     _TESTS = [{
2472         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2473         'info_dict': {
2474             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2475             'uploader': 'Sergey M.',
2476             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2477             'title': 'youtube-dl public playlist',
2478         },
2479         'playlist_count': 1,
2480     }, {
2481         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2482         'info_dict': {
2483             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2484             'uploader': 'Sergey M.',
2485             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2486             'title': 'youtube-dl empty playlist',
2487         },
2488         'playlist_count': 0,
2489     }, {
2490         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2491         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2492         'info_dict': {
2493             'title': '29C3: Not my department',
2494             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2495             'uploader': 'Christiaan008',
2496             'uploader_id': 'ChRiStIaAn008',
2497         },
2498         'playlist_count': 96,
2499     }, {
2500         'note': 'issue #673',
2501         'url': 'PLBB231211A4F62143',
2502         'info_dict': {
2503             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2504             'id': 'PLBB231211A4F62143',
2505             'uploader': 'Wickydoo',
2506             'uploader_id': 'Wickydoo',
2507         },
2508         'playlist_mincount': 26,
2509     }, {
2510         'note': 'Large playlist',
2511         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2512         'info_dict': {
2513             'title': 'Uploads from Cauchemar',
2514             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2515             'uploader': 'Cauchemar',
2516             'uploader_id': 'Cauchemar89',
2517         },
2518         'playlist_mincount': 799,
2519     }, {
2520         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2521         'info_dict': {
2522             'title': 'YDL_safe_search',
2523             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2524         },
2525         'playlist_count': 2,
2526         'skip': 'This playlist is private',
2527     }, {
2528         'note': 'embedded',
2529         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2530         'playlist_count': 4,
2531         'info_dict': {
2532             'title': 'JODA15',
2533             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2534             'uploader': 'milan',
2535             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2536         }
2537     }, {
2538         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2539         'playlist_mincount': 485,
2540         'info_dict': {
2541             'title': '2018 Chinese New Singles (11/6 updated)',
2542             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2543             'uploader': 'LBK',
2544             'uploader_id': 'sdragonfang',
2545         }
2546     }, {
2547         'note': 'Embedded SWF player',
2548         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2549         'playlist_count': 4,
2550         'info_dict': {
2551             'title': 'JODA7',
2552             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2553         },
2554         'skip': 'This playlist does not exist',
2555     }, {
2556         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2557         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2558         'info_dict': {
2559             'title': 'Uploads from Interstellar Movie',
2560             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2561             'uploader': 'Interstellar Movie',
2562             'uploader_id': 'InterstellarMovie1',
2563         },
2564         'playlist_mincount': 21,
2565     }, {
2566         # Playlist URL that does not actually serve a playlist
2567         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2568         'info_dict': {
2569             'id': 'FqZTN594JQw',
2570             'ext': 'webm',
2571             'title': "Smiley's People 01 detective, Adventure Series, Action",
2572             'uploader': 'STREEM',
2573             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2574             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2575             'upload_date': '20150526',
2576             'license': 'Standard YouTube License',
2577             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2578             'categories': ['People & Blogs'],
2579             'tags': list,
2580             'view_count': int,
2581             'like_count': int,
2582             'dislike_count': int,
2583         },
2584         'params': {
2585             'skip_download': True,
2586         },
2587         'skip': 'This video is not available.',
2588         'add_ie': [YoutubeIE.ie_key()],
2589     }, {
2590         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2591         'info_dict': {
2592             'id': 'yeWKywCrFtk',
2593             'ext': 'mp4',
2594             'title': 'Small Scale Baler and Braiding Rugs',
2595             'uploader': 'Backus-Page House Museum',
2596             'uploader_id': 'backuspagemuseum',
2597             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2598             'upload_date': '20161008',
2599             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2600             'categories': ['Nonprofits & Activism'],
2601             'tags': list,
2602             'like_count': int,
2603             'dislike_count': int,
2604         },
2605         'params': {
2606             'noplaylist': True,
2607             'skip_download': True,
2608         },
2609     }, {
2610         # https://github.com/ytdl-org/youtube-dl/issues/21844
2611         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2612         'info_dict': {
2613             'title': 'Data Analysis with Dr Mike Pound',
2614             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2615             'uploader_id': 'Computerphile',
2616             'uploader': 'Computerphile',
2617         },
2618         'playlist_mincount': 11,
2619     }, {
2620         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2621         'only_matching': True,
2622     }, {
2623         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2624         'only_matching': True,
2625     }, {
2626         # music album playlist
2627         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2628         'only_matching': True,
2629     }, {
2630         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2631         'only_matching': True,
2632     }, {
2633         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2634         'only_matching': True,
2635     }]
2636
2637     def _real_initialize(self):
2638         self._login()
2639
2640     def extract_videos_from_page(self, page):
2641         ids_in_page = []
2642         titles_in_page = []
2643
2644         for item in re.findall(
2645                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2646             attrs = extract_attributes(item)
2647             video_id = attrs['data-video-id']
2648             video_title = unescapeHTML(attrs.get('data-title'))
2649             if video_title:
2650                 video_title = video_title.strip()
2651             ids_in_page.append(video_id)
2652             titles_in_page.append(video_title)
2653
2654         # Fallback with old _VIDEO_RE
2655         self.extract_videos_from_page_impl(
2656             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2657
2658         # Relaxed fallbacks
2659         self.extract_videos_from_page_impl(
2660             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2661             ids_in_page, titles_in_page)
2662         self.extract_videos_from_page_impl(
2663             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2664             ids_in_page, titles_in_page)
2665
2666         return zip(ids_in_page, titles_in_page)
2667
2668     def _extract_mix(self, playlist_id):
2669         # The mixes are generated from a single video
2670         # the id of the playlist is just 'RD' + video_id
2671         ids = []
2672         last_id = playlist_id[-11:]
2673         for n in itertools.count(1):
2674             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2675             webpage = self._download_webpage(
2676                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2677             new_ids = orderedSet(re.findall(
2678                 r'''(?xs)data-video-username=".*?".*?
2679                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2680                 webpage))
2681             # Fetch new pages until all the videos are repeated, it seems that
2682             # there are always 51 unique videos.
2683             new_ids = [_id for _id in new_ids if _id not in ids]
2684             if not new_ids:
2685                 break
2686             ids.extend(new_ids)
2687             last_id = ids[-1]
2688
2689         url_results = self._ids_to_results(ids)
2690
2691         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2692         title_span = (
2693             search_title('playlist-title')
2694             or search_title('title long-title')
2695             or search_title('title'))
2696         title = clean_html(title_span)
2697
2698         return self.playlist_result(url_results, playlist_id, title)
2699
2700     def _extract_playlist(self, playlist_id):
2701         url = self._TEMPLATE_URL % playlist_id
2702         page = self._download_webpage(url, playlist_id)
2703
2704         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2705         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2706             match = match.strip()
2707             # Check if the playlist exists or is private
2708             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2709             if mobj:
2710                 reason = mobj.group('reason')
2711                 message = 'This playlist %s' % reason
2712                 if 'private' in reason:
2713                     message += ', use --username or --netrc to access it'
2714                 message += '.'
2715                 raise ExtractorError(message, expected=True)
2716             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2717                 raise ExtractorError(
2718                     'Invalid parameters. Maybe URL is incorrect.',
2719                     expected=True)
2720             elif re.match(r'[^<]*Choose your language[^<]*', match):
2721                 continue
2722             else:
2723                 self.report_warning('Youtube gives an alert message: ' + match)
2724
2725         playlist_title = self._html_search_regex(
2726             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2727             page, 'title', default=None)
2728
2729         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2730         uploader = self._html_search_regex(
2731             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2732             page, 'uploader', default=None)
2733         mobj = re.search(
2734             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2735             page)
2736         if mobj:
2737             uploader_id = mobj.group('uploader_id')
2738             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2739         else:
2740             uploader_id = uploader_url = None
2741
2742         has_videos = True
2743
2744         if not playlist_title:
2745             try:
2746                 # Some playlist URLs don't actually serve a playlist (e.g.
2747                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2748                 next(self._entries(page, playlist_id))
2749             except StopIteration:
2750                 has_videos = False
2751
2752         playlist = self.playlist_result(
2753             self._entries(page, playlist_id), playlist_id, playlist_title)
2754         playlist.update({
2755             'uploader': uploader,
2756             'uploader_id': uploader_id,
2757             'uploader_url': uploader_url,
2758         })
2759
2760         return has_videos, playlist
2761
2762     def _check_download_just_video(self, url, playlist_id):
2763         # Check if it's a video-specific URL
2764         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2765         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2766             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2767             'video id', default=None)
2768         if video_id:
2769             if self._downloader.params.get('noplaylist'):
2770                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2771                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2772             else:
2773                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2774                 return video_id, None
2775         return None, None
2776
2777     def _real_extract(self, url):
2778         # Extract playlist id
2779         mobj = re.match(self._VALID_URL, url)
2780         if mobj is None:
2781             raise ExtractorError('Invalid URL: %s' % url)
2782         playlist_id = mobj.group(1) or mobj.group(2)
2783
2784         video_id, video = self._check_download_just_video(url, playlist_id)
2785         if video:
2786             return video
2787
2788         if playlist_id.startswith(('RD', 'UL', 'PU')):
2789             # Mixes require a custom extraction process
2790             return self._extract_mix(playlist_id)
2791
2792         has_videos, playlist = self._extract_playlist(playlist_id)
2793         if has_videos or not video_id:
2794             return playlist
2795
2796         # Some playlist URLs don't actually serve a playlist (see
2797         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2798         # Fallback to plain video extraction if there is a video id
2799         # along with playlist id.
2800         return self.url_result(video_id, 'Youtube', video_id=video_id)
2801
2802
2803 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2804     IE_DESC = 'YouTube.com channels'
2805     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2806     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2807     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2808     IE_NAME = 'youtube:channel'
2809     _TESTS = [{
2810         'note': 'paginated channel',
2811         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2812         'playlist_mincount': 91,
2813         'info_dict': {
2814             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2815             'title': 'Uploads from lex will',
2816             'uploader': 'lex will',
2817             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2818         }
2819     }, {
2820         'note': 'Age restricted channel',
2821         # from https://www.youtube.com/user/DeusExOfficial
2822         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2823         'playlist_mincount': 64,
2824         'info_dict': {
2825             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2826             'title': 'Uploads from Deus Ex',
2827             'uploader': 'Deus Ex',
2828             'uploader_id': 'DeusExOfficial',
2829         },
2830     }, {
2831         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2832         'only_matching': True,
2833     }, {
2834         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2835         'only_matching': True,
2836     }]
2837
2838     @classmethod
2839     def suitable(cls, url):
2840         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2841                 else super(YoutubeChannelIE, cls).suitable(url))
2842
2843     def _build_template_url(self, url, channel_id):
2844         return self._TEMPLATE_URL % channel_id
2845
2846     def _real_extract(self, url):
2847         channel_id = self._match_id(url)
2848
2849         url = self._build_template_url(url, channel_id)
2850
2851         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2852         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2853         # otherwise fallback on channel by page extraction
2854         channel_page = self._download_webpage(
2855             url + '?view=57', channel_id,
2856             'Downloading channel page', fatal=False)
2857         if channel_page is False:
2858             channel_playlist_id = False
2859         else:
2860             channel_playlist_id = self._html_search_meta(
2861                 'channelId', channel_page, 'channel id', default=None)
2862             if not channel_playlist_id:
2863                 channel_url = self._html_search_meta(
2864                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2865                     channel_page, 'channel url', default=None)
2866                 if channel_url:
2867                     channel_playlist_id = self._search_regex(
2868                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2869                         channel_url, 'channel id', default=None)
2870         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2871             playlist_id = 'UU' + channel_playlist_id[2:]
2872             return self.url_result(
2873                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2874
2875         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2876         autogenerated = re.search(r'''(?x)
2877                 class="[^"]*?(?:
2878                     channel-header-autogenerated-label|
2879                     yt-channel-title-autogenerated
2880                 )[^"]*"''', channel_page) is not None
2881
2882         if autogenerated:
2883             # The videos are contained in a single page
2884             # the ajax pages can't be used, they are empty
2885             entries = [
2886                 self.url_result(
2887                     video_id, 'Youtube', video_id=video_id,
2888                     video_title=video_title)
2889                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2890             return self.playlist_result(entries, channel_id)
2891
2892         try:
2893             next(self._entries(channel_page, channel_id))
2894         except StopIteration:
2895             alert_message = self._html_search_regex(
2896                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2897                 channel_page, 'alert', default=None, group='alert')
2898             if alert_message:
2899                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2900
2901         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2902
2903
2904 class YoutubeUserIE(YoutubeChannelIE):
2905     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2906     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2907     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2908     IE_NAME = 'youtube:user'
2909
2910     _TESTS = [{
2911         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2912         'playlist_mincount': 320,
2913         'info_dict': {
2914             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2915             'title': 'Uploads from The Linux Foundation',
2916             'uploader': 'The Linux Foundation',
2917             'uploader_id': 'TheLinuxFoundation',
2918         }
2919     }, {
2920         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2921         # but not https://www.youtube.com/user/12minuteathlete/videos
2922         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2923         'playlist_mincount': 249,
2924         'info_dict': {
2925             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2926             'title': 'Uploads from 12 Minute Athlete',
2927             'uploader': '12 Minute Athlete',
2928             'uploader_id': 'the12minuteathlete',
2929         }
2930     }, {
2931         'url': 'ytuser:phihag',
2932         'only_matching': True,
2933     }, {
2934         'url': 'https://www.youtube.com/c/gametrailers',
2935         'only_matching': True,
2936     }, {
2937         'url': 'https://www.youtube.com/gametrailers',
2938         'only_matching': True,
2939     }, {
2940         # This channel is not available, geo restricted to JP
2941         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2942         'only_matching': True,
2943     }]
2944
2945     @classmethod
2946     def suitable(cls, url):
2947         # Don't return True if the url can be extracted with other youtube
2948         # extractor, the regex would is too permissive and it would match.
2949         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2950         if any(ie.suitable(url) for ie in other_yt_ies):
2951             return False
2952         else:
2953             return super(YoutubeUserIE, cls).suitable(url)
2954
2955     def _build_template_url(self, url, channel_id):
2956         mobj = re.match(self._VALID_URL, url)
2957         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2958
2959
2960 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2961     IE_DESC = 'YouTube.com live streams'
2962     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2963     IE_NAME = 'youtube:live'
2964
2965     _TESTS = [{
2966         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2967         'info_dict': {
2968             'id': 'a48o2S1cPoo',
2969             'ext': 'mp4',
2970             'title': 'The Young Turks - Live Main Show',
2971             'uploader': 'The Young Turks',
2972             'uploader_id': 'TheYoungTurks',
2973             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2974             'upload_date': '20150715',
2975             'license': 'Standard YouTube License',
2976             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2977             'categories': ['News & Politics'],
2978             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2979             'like_count': int,
2980             'dislike_count': int,
2981         },
2982         'params': {
2983             'skip_download': True,
2984         },
2985     }, {
2986         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2987         'only_matching': True,
2988     }, {
2989         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2990         'only_matching': True,
2991     }, {
2992         'url': 'https://www.youtube.com/TheYoungTurks/live',
2993         'only_matching': True,
2994     }]
2995
2996     def _real_extract(self, url):
2997         mobj = re.match(self._VALID_URL, url)
2998         channel_id = mobj.group('id')
2999         base_url = mobj.group('base_url')
3000         webpage = self._download_webpage(url, channel_id, fatal=False)
3001         if webpage:
3002             page_type = self._og_search_property(
3003                 'type', webpage, 'page type', default='')
3004             video_id = self._html_search_meta(
3005                 'videoId', webpage, 'video id', default=None)
3006             if page_type.startswith('video') and video_id and re.match(
3007                     r'^[0-9A-Za-z_-]{11}$', video_id):
3008                 return self.url_result(video_id, YoutubeIE.ie_key())
3009         return self.url_result(base_url)
3010
3011
3012 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3013     IE_DESC = 'YouTube.com user/channel playlists'
3014     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3015     IE_NAME = 'youtube:playlists'
3016
3017     _TESTS = [{
3018         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3019         'playlist_mincount': 4,
3020         'info_dict': {
3021             'id': 'ThirstForScience',
3022             'title': 'ThirstForScience',
3023         },
3024     }, {
3025         # with "Load more" button
3026         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3027         'playlist_mincount': 70,
3028         'info_dict': {
3029             'id': 'igorkle1',
3030             'title': 'Игорь Клейнер',
3031         },
3032     }, {
3033         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3034         'playlist_mincount': 17,
3035         'info_dict': {
3036             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3037             'title': 'Chem Player',
3038         },
3039         'skip': 'Blocked',
3040     }]
3041
3042
3043 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3044     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3045
3046
3047 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3048     IE_DESC = 'YouTube.com searches'
3049     # there doesn't appear to be a real limit, for example if you search for
3050     # 'python' you get more than 8.000.000 results
3051     _MAX_RESULTS = float('inf')
3052     IE_NAME = 'youtube:search'
3053     _SEARCH_KEY = 'ytsearch'
3054     _EXTRA_QUERY_ARGS = {}
3055     _TESTS = []
3056
3057     def _get_n_results(self, query, n):
3058         """Get a specified number of results for a query"""
3059
3060         videos = []
3061         limit = n
3062
3063         url_query = {
3064             'search_query': query.encode('utf-8'),
3065         }
3066         url_query.update(self._EXTRA_QUERY_ARGS)
3067         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3068
3069         for pagenum in itertools.count(1):
3070             data = self._download_json(
3071                 result_url, video_id='query "%s"' % query,
3072                 note='Downloading page %s' % pagenum,
3073                 errnote='Unable to download API page',
3074                 query={'spf': 'navigate'})
3075             html_content = data[1]['body']['content']
3076
3077             if 'class="search-message' in html_content:
3078                 raise ExtractorError(
3079                     '[youtube] No video results', expected=True)
3080
3081             new_videos = list(self._process_page(html_content))
3082             videos += new_videos
3083             if not new_videos or len(videos) > limit:
3084                 break
3085             next_link = self._html_search_regex(
3086                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3087                 html_content, 'next link', default=None)
3088             if next_link is None:
3089                 break
3090             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3091
3092         if len(videos) > n:
3093             videos = videos[:n]
3094         return self.playlist_result(videos, query)
3095
3096
3097 class YoutubeSearchDateIE(YoutubeSearchIE):
3098     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3099     _SEARCH_KEY = 'ytsearchdate'
3100     IE_DESC = 'YouTube.com searches, newest videos first'
3101     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3102
3103
3104 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3105     IE_DESC = 'YouTube.com search URLs'
3106     IE_NAME = 'youtube:search_url'
3107     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3108     _TESTS = [{
3109         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3110         'playlist_mincount': 5,
3111         'info_dict': {
3112             'title': 'youtube-dl test video',
3113         }
3114     }, {
3115         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3116         'only_matching': True,
3117     }]
3118
3119     def _real_extract(self, url):
3120         mobj = re.match(self._VALID_URL, url)
3121         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3122         webpage = self._download_webpage(url, query)
3123         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3124
3125
3126 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3127     IE_DESC = 'YouTube.com (multi-season) shows'
3128     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3129     IE_NAME = 'youtube:show'
3130     _TESTS = [{
3131         'url': 'https://www.youtube.com/show/airdisasters',
3132         'playlist_mincount': 5,
3133         'info_dict': {
3134             'id': 'airdisasters',
3135             'title': 'Air Disasters',
3136         }
3137     }]
3138
3139     def _real_extract(self, url):
3140         playlist_id = self._match_id(url)
3141         return super(YoutubeShowIE, self)._real_extract(
3142             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3143
3144
3145 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3146     """
3147     Base class for feed extractors
3148     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3149     """
3150     _LOGIN_REQUIRED = True
3151
3152     @property
3153     def IE_NAME(self):
3154         return 'youtube:%s' % self._FEED_NAME
3155
3156     def _real_initialize(self):
3157         self._login()
3158
3159     def _entries(self, page):
3160         # The extraction process is the same as for playlists, but the regex
3161         # for the video ids doesn't contain an index
3162         ids = []
3163         more_widget_html = content_html = page
3164         for page_num in itertools.count(1):
3165             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3166
3167             # 'recommended' feed has infinite 'load more' and each new portion spins
3168             # the same videos in (sometimes) slightly different order, so we'll check
3169             # for unicity and break when portion has no new videos
3170             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3171             if not new_ids:
3172                 break
3173
3174             ids.extend(new_ids)
3175
3176             for entry in self._ids_to_results(new_ids):
3177                 yield entry
3178
3179             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3180             if not mobj:
3181                 break
3182
3183             more = self._download_json(
3184                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3185                 'Downloading page #%s' % page_num,
3186                 transform_source=uppercase_escape)
3187             content_html = more['content_html']
3188             more_widget_html = more['load_more_widget_html']
3189
3190     def _real_extract(self, url):
3191         page = self._download_webpage(
3192             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3193             self._PLAYLIST_TITLE)
3194         return self.playlist_result(
3195             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3196
3197
3198 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3199     IE_NAME = 'youtube:watchlater'
3200     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3201     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3202
3203     _TESTS = [{
3204         'url': 'https://www.youtube.com/playlist?list=WL',
3205         'only_matching': True,
3206     }, {
3207         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3208         'only_matching': True,
3209     }]
3210
3211     def _real_extract(self, url):
3212         _, video = self._check_download_just_video(url, 'WL')
3213         if video:
3214             return video
3215         _, playlist = self._extract_playlist('WL')
3216         return playlist
3217
3218
3219 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3220     IE_NAME = 'youtube:favorites'
3221     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3222     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3223     _LOGIN_REQUIRED = True
3224
3225     def _real_extract(self, url):
3226         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3227         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3228         return self.url_result(playlist_id, 'YoutubePlaylist')
3229
3230
3231 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3232     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3233     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3234     _FEED_NAME = 'recommended'
3235     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3236
3237
3238 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3239     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3240     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3241     _FEED_NAME = 'subscriptions'
3242     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3243
3244
3245 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3246     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3247     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3248     _FEED_NAME = 'history'
3249     _PLAYLIST_TITLE = 'Youtube History'
3250
3251
3252 class YoutubeTruncatedURLIE(InfoExtractor):
3253     IE_NAME = 'youtube:truncated_url'
3254     IE_DESC = False  # Do not list
3255     _VALID_URL = r'''(?x)
3256         (?:https?://)?
3257         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3258         (?:watch\?(?:
3259             feature=[a-z_]+|
3260             annotation_id=annotation_[^&]+|
3261             x-yt-cl=[0-9]+|
3262             hl=[^&]*|
3263             t=[0-9]+
3264         )?
3265         |
3266             attribution_link\?a=[^&]+
3267         )
3268         $
3269     '''
3270
3271     _TESTS = [{
3272         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3273         'only_matching': True,
3274     }, {
3275         'url': 'https://www.youtube.com/watch?',
3276         'only_matching': True,
3277     }, {
3278         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3279         'only_matching': True,
3280     }, {
3281         'url': 'https://www.youtube.com/watch?feature=foo',
3282         'only_matching': True,
3283     }, {
3284         'url': 'https://www.youtube.com/watch?hl=en-GB',
3285         'only_matching': True,
3286     }, {
3287         'url': 'https://www.youtube.com/watch?t=2372',
3288         'only_matching': True,
3289     }]
3290
3291     def _real_extract(self, url):
3292         raise ExtractorError(
3293             'Did you forget to quote the URL? Remember that & is a meta '
3294             'character in most shells, so you want to put the URL in quotes, '
3295             'like  youtube-dl '
3296             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3297             ' or simply  youtube-dl BaW_jenozKc  .',
3298             expected=True)
3299
3300
3301 class YoutubeTruncatedIDIE(InfoExtractor):
3302     IE_NAME = 'youtube:truncated_id'
3303     IE_DESC = False  # Do not list
3304     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3305
3306     _TESTS = [{
3307         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3308         'only_matching': True,
3309     }]
3310
3311     def _real_extract(self, url):
3312         video_id = self._match_id(url)
3313         raise ExtractorError(
3314             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3315             expected=True)