2cf79e74d8dbb9cab33f5401b3c8bb95051af6d2
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_HTTPError,
20     compat_kwargs,
21     compat_parse_qs,
22     compat_urllib_parse_unquote,
23     compat_urllib_parse_unquote_plus,
24     compat_urllib_parse_urlencode,
25     compat_urllib_parse_urlparse,
26     compat_urlparse,
27     compat_str,
28 )
29 from ..utils import (
30     bool_or_none,
31     clean_html,
32     error_to_compat_str,
33     extract_attributes,
34     ExtractorError,
35     float_or_none,
36     get_element_by_attribute,
37     get_element_by_id,
38     int_or_none,
39     mimetype2ext,
40     orderedSet,
41     parse_codecs,
42     parse_duration,
43     remove_quotes,
44     remove_start,
45     smuggle_url,
46     str_or_none,
47     str_to_int,
48     try_get,
49     unescapeHTML,
50     unified_strdate,
51     unsmuggle_url,
52     uppercase_escape,
53     url_or_none,
54     urlencode_postdata,
55 )
56
57
58 class YoutubeBaseInfoExtractor(InfoExtractor):
59     """Provide base functions for Youtube extractors"""
60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
62
63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
66
67     _NETRC_MACHINE = 'youtube'
68     # If True it will raise an error if no login info is provided
69     _LOGIN_REQUIRED = False
70
71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
72
73     def _set_language(self):
74         self._set_cookie(
75             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
76             # YouTube sets the expire time to about two months
77             expire_time=time.time() + 2 * 30 * 24 * 3600)
78
79     def _ids_to_results(self, ids):
80         return [
81             self.url_result(vid_id, 'Youtube', video_id=vid_id)
82             for vid_id in ids]
83
84     def _login(self):
85         """
86         Attempt to log in to YouTube.
87         True is returned if successful or skipped.
88         False is returned if login failed.
89
90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91         """
92         username, password = self._get_login_info()
93         # No authentication to be performed
94         if username is None:
95             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
96                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
97             return True
98
99         login_page = self._download_webpage(
100             self._LOGIN_URL, None,
101             note='Downloading login page',
102             errnote='unable to fetch login page', fatal=False)
103         if login_page is False:
104             return
105
106         login_form = self._hidden_inputs(login_page)
107
108         def req(url, f_req, note, errnote):
109             data = login_form.copy()
110             data.update({
111                 'pstMsg': 1,
112                 'checkConnection': 'youtube',
113                 'checkedDomains': 'youtube',
114                 'hl': 'en',
115                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
116                 'f.req': json.dumps(f_req),
117                 'flowName': 'GlifWebSignIn',
118                 'flowEntry': 'ServiceLogin',
119                 # TODO: reverse actual botguard identifier generation algo
120                 'bgRequest': '["identifier",""]',
121             })
122             return self._download_json(
123                 url, None, note=note, errnote=errnote,
124                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
125                 fatal=False,
126                 data=urlencode_postdata(data), headers={
127                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128                     'Google-Accounts-XSRF': 1,
129                 })
130
131         def warn(message):
132             self._downloader.report_warning(message)
133
134         lookup_req = [
135             username,
136             None, [], None, 'US', None, None, 2, False, True,
137             [
138                 None, None,
139                 [2, 1, None, 1,
140                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
141                  None, [], 4],
142                 1, [None, None, []], None, None, None, True
143             ],
144             username,
145         ]
146
147         lookup_results = req(
148             self._LOOKUP_URL, lookup_req,
149             'Looking up account info', 'Unable to look up account info')
150
151         if lookup_results is False:
152             return False
153
154         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
155         if not user_hash:
156             warn('Unable to extract user hash')
157             return False
158
159         challenge_req = [
160             user_hash,
161             None, 1, None, [1, None, None, None, [password, None, True]],
162             [
163                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164                 1, [None, None, []], None, None, None, True
165             ]]
166
167         challenge_results = req(
168             self._CHALLENGE_URL, challenge_req,
169             'Logging in', 'Unable to log in')
170
171         if challenge_results is False:
172             return
173
174         login_res = try_get(challenge_results, lambda x: x[0][5], list)
175         if login_res:
176             login_msg = try_get(login_res, lambda x: x[5], compat_str)
177             warn(
178                 'Unable to login: %s' % 'Invalid password'
179                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
180             return False
181
182         res = try_get(challenge_results, lambda x: x[0][-1], list)
183         if not res:
184             warn('Unable to extract result entry')
185             return False
186
187         login_challenge = try_get(res, lambda x: x[0][0], list)
188         if login_challenge:
189             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190             if challenge_str == 'TWO_STEP_VERIFICATION':
191                 # SEND_SUCCESS - TFA code has been successfully sent to phone
192                 # QUOTA_EXCEEDED - reached the limit of TFA codes
193                 status = try_get(login_challenge, lambda x: x[5], compat_str)
194                 if status == 'QUOTA_EXCEEDED':
195                     warn('Exceeded the limit of TFA codes, try later')
196                     return False
197
198                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
199                 if not tl:
200                     warn('Unable to extract TL')
201                     return False
202
203                 tfa_code = self._get_tfa_info('2-step verification code')
204
205                 if not tfa_code:
206                     warn(
207                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
209                     return False
210
211                 tfa_code = remove_start(tfa_code, 'G-')
212
213                 tfa_req = [
214                     user_hash, None, 2, None,
215                     [
216                         9, None, None, None, None, None, None, None,
217                         [None, tfa_code, True, 2]
218                     ]]
219
220                 tfa_results = req(
221                     self._TFA_URL.format(tl), tfa_req,
222                     'Submitting TFA code', 'Unable to submit TFA code')
223
224                 if tfa_results is False:
225                     return False
226
227                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
228                 if tfa_res:
229                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
230                     warn(
231                         'Unable to finish TFA: %s' % 'Invalid TFA code'
232                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
233                     return False
234
235                 check_cookie_url = try_get(
236                     tfa_results, lambda x: x[0][-1][2], compat_str)
237             else:
238                 CHALLENGES = {
239                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
242                 }
243                 challenge = CHALLENGES.get(
244                     challenge_str,
245                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
246                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
247                 return False
248         else:
249             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
250
251         if not check_cookie_url:
252             warn('Unable to extract CheckCookie URL')
253             return False
254
255         check_cookie_results = self._download_webpage(
256             check_cookie_url, None, 'Checking cookie', fatal=False)
257
258         if check_cookie_results is False:
259             return False
260
261         if 'https://myaccount.google.com/' not in check_cookie_results:
262             warn('Unable to log in')
263             return False
264
265         return True
266
267     def _download_webpage_handle(self, *args, **kwargs):
268         query = kwargs.get('query', {}).copy()
269         query['disable_polymer'] = 'true'
270         kwargs['query'] = query
271         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
272             *args, **compat_kwargs(kwargs))
273
274     def _real_initialize(self):
275         if self._downloader is None:
276             return
277         self._set_language()
278         if not self._login():
279             return
280
281
282 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
283     # Extract entries from page with "Load more" button
284     def _entries(self, page, playlist_id):
285         more_widget_html = content_html = page
286         for page_num in itertools.count(1):
287             for entry in self._process_page(content_html):
288                 yield entry
289
290             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
291             if not mobj:
292                 break
293
294             count = 0
295             retries = 3
296             while count <= retries:
297                 try:
298                     # Downloading page may result in intermittent 5xx HTTP error
299                     # that is usually worked around with a retry
300                     more = self._download_json(
301                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302                         'Downloading page #%s%s'
303                         % (page_num, ' (retry #%d)' % count if count else ''),
304                         transform_source=uppercase_escape)
305                     break
306                 except ExtractorError as e:
307                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
308                         count += 1
309                         if count <= retries:
310                             continue
311                     raise
312
313             content_html = more['content_html']
314             if not content_html.strip():
315                 # Some webpages show a "Load more" button but they don't
316                 # have more videos
317                 break
318             more_widget_html = more['load_more_widget_html']
319
320
321 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322     def _process_page(self, content):
323         for video_id, video_title in self.extract_videos_from_page(content):
324             yield self.url_result(video_id, 'Youtube', video_id, video_title)
325
326     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327         for mobj in re.finditer(video_re, page):
328             # The link with index 0 is not the first video of the playlist (not sure if still actual)
329             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
330                 continue
331             video_id = mobj.group('id')
332             video_title = unescapeHTML(
333                 mobj.group('title')) if 'title' in mobj.groupdict() else None
334             if video_title:
335                 video_title = video_title.strip()
336             if video_title == '► Play all':
337                 video_title = None
338             try:
339                 idx = ids_in_page.index(video_id)
340                 if video_title and not titles_in_page[idx]:
341                     titles_in_page[idx] = video_title
342             except ValueError:
343                 ids_in_page.append(video_id)
344                 titles_in_page.append(video_title)
345
346     def extract_videos_from_page(self, page):
347         ids_in_page = []
348         titles_in_page = []
349         self.extract_videos_from_page_impl(
350             self._VIDEO_RE, page, ids_in_page, titles_in_page)
351         return zip(ids_in_page, titles_in_page)
352
353
354 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355     def _process_page(self, content):
356         for playlist_id in orderedSet(re.findall(
357                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
358                 content)):
359             yield self.url_result(
360                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
361
362     def _real_extract(self, url):
363         playlist_id = self._match_id(url)
364         webpage = self._download_webpage(url, playlist_id)
365         title = self._og_search_title(webpage, fatal=False)
366         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
367
368
369 class YoutubeIE(YoutubeBaseInfoExtractor):
370     IE_DESC = 'YouTube.com'
371     _VALID_URL = r"""(?x)^
372                      (
373                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
374                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
375                             (?:www\.)?deturl\.com/www\.youtube\.com/|
376                             (?:www\.)?pwnyoutube\.com/|
377                             (?:www\.)?hooktube\.com/|
378                             (?:www\.)?yourepeat\.com/|
379                             tube\.majestyc\.net/|
380                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
381                             (?:(?:www|dev)\.)?invidio\.us/|
382                             (?:(?:www|no)\.)?invidiou\.sh/|
383                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
384                             (?:www\.)?invidious\.kabi\.tk/|
385                             (?:www\.)?invidious\.13ad\.de/|
386                             (?:www\.)?invidious\.mastodon\.host/|
387                             (?:www\.)?invidious\.nixnet\.xyz/|
388                             (?:www\.)?invidious\.drycat\.fr/|
389                             (?:www\.)?tube\.poal\.co/|
390                             (?:www\.)?vid\.wxzm\.sx/|
391                             (?:www\.)?yewtu\.be/|
392                             (?:www\.)?yt\.elukerio\.org/|
393                             (?:www\.)?yt\.lelux\.fi/|
394                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
395                             (?:www\.)?qklhadlycap4cnod\.onion/|
396                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
400                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
401                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
402                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
403                          (?:                                                  # the various things that can precede the ID:
404                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
405                              |(?:                                             # or the v= param in all its forms
406                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
407                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
408                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
409                                  v=
410                              )
411                          ))
412                          |(?:
413                             youtu\.be|                                        # just youtu.be/xxxx
414                             vid\.plus|                                        # or vid.plus/xxxx
415                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
416                          )/
417                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
418                          )
419                      )?                                                       # all until now is optional -> you can pass the naked ID
420                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
421                      (?!.*?\blist=
422                         (?:
423                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
424                             WL                                                # WL are handled by the watch later IE
425                         )
426                      )
427                      (?(1).+)?                                                # if we found the ID, everything can follow
428                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
429     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
430     _PLAYER_INFO_RE = (
431         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
432         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
433     )
434     _formats = {
435         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
436         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
437         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
438         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
439         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
440         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
442         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
443         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
444         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
445         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
446         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
447         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
448         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
449         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
450         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
451         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
452         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
453
454
455         # 3D videos
456         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
457         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
458         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
459         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
460         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
461         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
462         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
463
464         # Apple HTTP Live Streaming
465         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
466         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
468         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
469         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
470         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
471         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
472         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
473
474         # DASH mp4 video
475         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
476         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
477         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
479         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
480         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
481         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
482         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
483         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
484         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
485         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
486         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
487
488         # Dash mp4 audio
489         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
490         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
491         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
492         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
493         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
494         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
495         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
496
497         # Dash webm
498         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
502         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
503         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
504         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
505         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
513         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
514         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
516         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
517         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
518         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
519         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
520
521         # Dash webm audio
522         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
523         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
524
525         # Dash webm audio with opus inside
526         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
527         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
528         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
529
530         # RTMP (unnamed)
531         '_rtmp': {'protocol': 'rtmp'},
532
533         # av01 video only formats sometimes served with "unknown" codecs
534         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
535         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
536         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
537         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
538     }
539     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
540
541     _GEO_BYPASS = False
542
543     IE_NAME = 'youtube'
544     _TESTS = [
545         {
546             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
547             'info_dict': {
548                 'id': 'BaW_jenozKc',
549                 'ext': 'mp4',
550                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
551                 'uploader': 'Philipp Hagemeister',
552                 'uploader_id': 'phihag',
553                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
554                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
555                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
556                 'upload_date': '20121002',
557                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
558                 'categories': ['Science & Technology'],
559                 'tags': ['youtube-dl'],
560                 'duration': 10,
561                 'view_count': int,
562                 'like_count': int,
563                 'dislike_count': int,
564                 'start_time': 1,
565                 'end_time': 9,
566             }
567         },
568         {
569             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
570             'note': 'Test generic use_cipher_signature video (#897)',
571             'info_dict': {
572                 'id': 'UxxajLWwzqY',
573                 'ext': 'mp4',
574                 'upload_date': '20120506',
575                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
576                 'alt_title': 'I Love It (feat. Charli XCX)',
577                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
578                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
579                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
580                          'iconic ep', 'iconic', 'love', 'it'],
581                 'duration': 180,
582                 'uploader': 'Icona Pop',
583                 'uploader_id': 'IconaPop',
584                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
585                 'creator': 'Icona Pop',
586                 'track': 'I Love It (feat. Charli XCX)',
587                 'artist': 'Icona Pop',
588             }
589         },
590         {
591             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
592             'note': 'Test VEVO video with age protection (#956)',
593             'info_dict': {
594                 'id': '07FYdnEawAQ',
595                 'ext': 'mp4',
596                 'upload_date': '20130703',
597                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
598                 'alt_title': 'Tunnel Vision',
599                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
600                 'duration': 419,
601                 'uploader': 'justintimberlakeVEVO',
602                 'uploader_id': 'justintimberlakeVEVO',
603                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
604                 'creator': 'Justin Timberlake',
605                 'track': 'Tunnel Vision',
606                 'artist': 'Justin Timberlake',
607                 'age_limit': 18,
608             }
609         },
610         {
611             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
612             'note': 'Embed-only video (#1746)',
613             'info_dict': {
614                 'id': 'yZIXLfi8CZQ',
615                 'ext': 'mp4',
616                 'upload_date': '20120608',
617                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
618                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
619                 'uploader': 'SET India',
620                 'uploader_id': 'setindia',
621                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
622                 'age_limit': 18,
623             }
624         },
625         {
626             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
627             'note': 'Use the first video ID in the URL',
628             'info_dict': {
629                 'id': 'BaW_jenozKc',
630                 'ext': 'mp4',
631                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
632                 'uploader': 'Philipp Hagemeister',
633                 'uploader_id': 'phihag',
634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
635                 'upload_date': '20121002',
636                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
637                 'categories': ['Science & Technology'],
638                 'tags': ['youtube-dl'],
639                 'duration': 10,
640                 'view_count': int,
641                 'like_count': int,
642                 'dislike_count': int,
643             },
644             'params': {
645                 'skip_download': True,
646             },
647         },
648         {
649             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
650             'note': '256k DASH audio (format 141) via DASH manifest',
651             'info_dict': {
652                 'id': 'a9LDPn-MO4I',
653                 'ext': 'm4a',
654                 'upload_date': '20121002',
655                 'uploader_id': '8KVIDEO',
656                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
657                 'description': '',
658                 'uploader': '8KVIDEO',
659                 'title': 'UHDTV TEST 8K VIDEO.mp4'
660             },
661             'params': {
662                 'youtube_include_dash_manifest': True,
663                 'format': '141',
664             },
665             'skip': 'format 141 not served anymore',
666         },
667         # DASH manifest with encrypted signature
668         {
669             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
670             'info_dict': {
671                 'id': 'IB3lcPjvWLA',
672                 'ext': 'm4a',
673                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
674                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
675                 'duration': 244,
676                 'uploader': 'AfrojackVEVO',
677                 'uploader_id': 'AfrojackVEVO',
678                 'upload_date': '20131011',
679             },
680             'params': {
681                 'youtube_include_dash_manifest': True,
682                 'format': '141/bestaudio[ext=m4a]',
683             },
684         },
685         # JS player signature function name containing $
686         {
687             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
688             'info_dict': {
689                 'id': 'nfWlot6h_JM',
690                 'ext': 'm4a',
691                 'title': 'Taylor Swift - Shake It Off',
692                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
693                 'duration': 242,
694                 'uploader': 'TaylorSwiftVEVO',
695                 'uploader_id': 'TaylorSwiftVEVO',
696                 'upload_date': '20140818',
697             },
698             'params': {
699                 'youtube_include_dash_manifest': True,
700                 'format': '141/bestaudio[ext=m4a]',
701             },
702         },
703         # Controversy video
704         {
705             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
706             'info_dict': {
707                 'id': 'T4XJQO3qol8',
708                 'ext': 'mp4',
709                 'duration': 219,
710                 'upload_date': '20100909',
711                 'uploader': 'Amazing Atheist',
712                 'uploader_id': 'TheAmazingAtheist',
713                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
714                 'title': 'Burning Everyone\'s Koran',
715                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
716             }
717         },
718         # Normal age-gate video (No vevo, embed allowed)
719         {
720             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
721             'info_dict': {
722                 'id': 'HtVdAasjOgU',
723                 'ext': 'mp4',
724                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
725                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
726                 'duration': 142,
727                 'uploader': 'The Witcher',
728                 'uploader_id': 'WitcherGame',
729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
730                 'upload_date': '20140605',
731                 'age_limit': 18,
732             },
733         },
734         # Age-gate video with encrypted signature
735         {
736             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
737             'info_dict': {
738                 'id': '6kLq3WMV1nU',
739                 'ext': 'mp4',
740                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
741                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
742                 'duration': 246,
743                 'uploader': 'LloydVEVO',
744                 'uploader_id': 'LloydVEVO',
745                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
746                 'upload_date': '20110629',
747                 'age_limit': 18,
748             },
749         },
750         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
751         # YouTube Red ad is not captured for creator
752         {
753             'url': '__2ABJjxzNo',
754             'info_dict': {
755                 'id': '__2ABJjxzNo',
756                 'ext': 'mp4',
757                 'duration': 266,
758                 'upload_date': '20100430',
759                 'uploader_id': 'deadmau5',
760                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
761                 'creator': 'Dada Life, deadmau5',
762                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
763                 'uploader': 'deadmau5',
764                 'title': 'Deadmau5 - Some Chords (HD)',
765                 'alt_title': 'This Machine Kills Some Chords',
766             },
767             'expected_warnings': [
768                 'DASH manifest missing',
769             ]
770         },
771         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
772         {
773             'url': 'lqQg6PlCWgI',
774             'info_dict': {
775                 'id': 'lqQg6PlCWgI',
776                 'ext': 'mp4',
777                 'duration': 6085,
778                 'upload_date': '20150827',
779                 'uploader_id': 'olympic',
780                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
781                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
782                 'uploader': 'Olympic',
783                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
784             },
785             'params': {
786                 'skip_download': 'requires avconv',
787             }
788         },
789         # Non-square pixels
790         {
791             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
792             'info_dict': {
793                 'id': '_b-2C3KPAM0',
794                 'ext': 'mp4',
795                 'stretched_ratio': 16 / 9.,
796                 'duration': 85,
797                 'upload_date': '20110310',
798                 'uploader_id': 'AllenMeow',
799                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
800                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
801                 'uploader': '孫ᄋᄅ',
802                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
803             },
804         },
805         # url_encoded_fmt_stream_map is empty string
806         {
807             'url': 'qEJwOuvDf7I',
808             'info_dict': {
809                 'id': 'qEJwOuvDf7I',
810                 'ext': 'webm',
811                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
812                 'description': '',
813                 'upload_date': '20150404',
814                 'uploader_id': 'spbelect',
815                 'uploader': 'Наблюдатели Петербурга',
816             },
817             'params': {
818                 'skip_download': 'requires avconv',
819             },
820             'skip': 'This live event has ended.',
821         },
822         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
823         {
824             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
825             'info_dict': {
826                 'id': 'FIl7x6_3R5Y',
827                 'ext': 'webm',
828                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
829                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
830                 'duration': 220,
831                 'upload_date': '20150625',
832                 'uploader_id': 'dorappi2000',
833                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
834                 'uploader': 'dorappi2000',
835                 'formats': 'mincount:31',
836             },
837             'skip': 'not actual anymore',
838         },
839         # DASH manifest with segment_list
840         {
841             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
842             'md5': '8ce563a1d667b599d21064e982ab9e31',
843             'info_dict': {
844                 'id': 'CsmdDsKjzN8',
845                 'ext': 'mp4',
846                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
847                 'uploader': 'Airtek',
848                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
849                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
850                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
851             },
852             'params': {
853                 'youtube_include_dash_manifest': True,
854                 'format': '135',  # bestvideo
855             },
856             'skip': 'This live event has ended.',
857         },
858         {
859             # Multifeed videos (multiple cameras), URL is for Main Camera
860             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
861             'info_dict': {
862                 'id': 'jqWvoWXjCVs',
863                 'title': 'teamPGP: Rocket League Noob Stream',
864                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
865             },
866             'playlist': [{
867                 'info_dict': {
868                     'id': 'jqWvoWXjCVs',
869                     'ext': 'mp4',
870                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
871                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
872                     'duration': 7335,
873                     'upload_date': '20150721',
874                     'uploader': 'Beer Games Beer',
875                     'uploader_id': 'beergamesbeer',
876                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
877                     'license': 'Standard YouTube License',
878                 },
879             }, {
880                 'info_dict': {
881                     'id': '6h8e8xoXJzg',
882                     'ext': 'mp4',
883                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
885                     'duration': 7337,
886                     'upload_date': '20150721',
887                     'uploader': 'Beer Games Beer',
888                     'uploader_id': 'beergamesbeer',
889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
890                     'license': 'Standard YouTube License',
891                 },
892             }, {
893                 'info_dict': {
894                     'id': 'PUOgX5z9xZw',
895                     'ext': 'mp4',
896                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
897                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
898                     'duration': 7337,
899                     'upload_date': '20150721',
900                     'uploader': 'Beer Games Beer',
901                     'uploader_id': 'beergamesbeer',
902                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
903                     'license': 'Standard YouTube License',
904                 },
905             }, {
906                 'info_dict': {
907                     'id': 'teuwxikvS5k',
908                     'ext': 'mp4',
909                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
910                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
911                     'duration': 7334,
912                     'upload_date': '20150721',
913                     'uploader': 'Beer Games Beer',
914                     'uploader_id': 'beergamesbeer',
915                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
916                     'license': 'Standard YouTube License',
917                 },
918             }],
919             'params': {
920                 'skip_download': True,
921             },
922             'skip': 'This video is not available.',
923         },
924         {
925             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
926             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
927             'info_dict': {
928                 'id': 'gVfLd0zydlo',
929                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
930             },
931             'playlist_count': 2,
932             'skip': 'Not multifeed anymore',
933         },
934         {
935             'url': 'https://vid.plus/FlRa-iH7PGw',
936             'only_matching': True,
937         },
938         {
939             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
940             'only_matching': True,
941         },
942         {
943             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
944             # Also tests cut-off URL expansion in video description (see
945             # https://github.com/ytdl-org/youtube-dl/issues/1892,
946             # https://github.com/ytdl-org/youtube-dl/issues/8164)
947             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
948             'info_dict': {
949                 'id': 'lsguqyKfVQg',
950                 'ext': 'mp4',
951                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
952                 'alt_title': 'Dark Walk - Position Music',
953                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
954                 'duration': 133,
955                 'upload_date': '20151119',
956                 'uploader_id': 'IronSoulElf',
957                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
958                 'uploader': 'IronSoulElf',
959                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
960                 'track': 'Dark Walk - Position Music',
961                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
962                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
963             },
964             'params': {
965                 'skip_download': True,
966             },
967         },
968         {
969             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
970             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
971             'only_matching': True,
972         },
973         {
974             # Video with yt:stretch=17:0
975             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
976             'info_dict': {
977                 'id': 'Q39EVAstoRM',
978                 'ext': 'mp4',
979                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
980                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
981                 'upload_date': '20151107',
982                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
983                 'uploader': 'CH GAMER DROID',
984             },
985             'params': {
986                 'skip_download': True,
987             },
988             'skip': 'This video does not exist.',
989         },
990         {
991             # Video licensed under Creative Commons
992             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
993             'info_dict': {
994                 'id': 'M4gD1WSo5mA',
995                 'ext': 'mp4',
996                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
997                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
998                 'duration': 721,
999                 'upload_date': '20150127',
1000                 'uploader_id': 'BerkmanCenter',
1001                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1002                 'uploader': 'The Berkman Klein Center for Internet & Society',
1003                 'license': 'Creative Commons Attribution license (reuse allowed)',
1004             },
1005             'params': {
1006                 'skip_download': True,
1007             },
1008         },
1009         {
1010             # Channel-like uploader_url
1011             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1012             'info_dict': {
1013                 'id': 'eQcmzGIKrzg',
1014                 'ext': 'mp4',
1015                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1016                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1017                 'duration': 4060,
1018                 'upload_date': '20151119',
1019                 'uploader': 'Bernie Sanders',
1020                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1021                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1022                 'license': 'Creative Commons Attribution license (reuse allowed)',
1023             },
1024             'params': {
1025                 'skip_download': True,
1026             },
1027         },
1028         {
1029             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1030             'only_matching': True,
1031         },
1032         {
1033             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1034             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1035             'only_matching': True,
1036         },
1037         {
1038             # Rental video preview
1039             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1040             'info_dict': {
1041                 'id': 'uGpuVWrhIzE',
1042                 'ext': 'mp4',
1043                 'title': 'Piku - Trailer',
1044                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1045                 'upload_date': '20150811',
1046                 'uploader': 'FlixMatrix',
1047                 'uploader_id': 'FlixMatrixKaravan',
1048                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1049                 'license': 'Standard YouTube License',
1050             },
1051             'params': {
1052                 'skip_download': True,
1053             },
1054             'skip': 'This video is not available.',
1055         },
1056         {
1057             # YouTube Red video with episode data
1058             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1059             'info_dict': {
1060                 'id': 'iqKdEhx-dD4',
1061                 'ext': 'mp4',
1062                 'title': 'Isolation - Mind Field (Ep 1)',
1063                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1064                 'duration': 2085,
1065                 'upload_date': '20170118',
1066                 'uploader': 'Vsauce',
1067                 'uploader_id': 'Vsauce',
1068                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1069                 'series': 'Mind Field',
1070                 'season_number': 1,
1071                 'episode_number': 1,
1072             },
1073             'params': {
1074                 'skip_download': True,
1075             },
1076             'expected_warnings': [
1077                 'Skipping DASH manifest',
1078             ],
1079         },
1080         {
1081             # The following content has been identified by the YouTube community
1082             # as inappropriate or offensive to some audiences.
1083             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1084             'info_dict': {
1085                 'id': '6SJNVb0GnPI',
1086                 'ext': 'mp4',
1087                 'title': 'Race Differences in Intelligence',
1088                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1089                 'duration': 965,
1090                 'upload_date': '20140124',
1091                 'uploader': 'New Century Foundation',
1092                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1093                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1094             },
1095             'params': {
1096                 'skip_download': True,
1097             },
1098         },
1099         {
1100             # itag 212
1101             'url': '1t24XAntNCY',
1102             'only_matching': True,
1103         },
1104         {
1105             # geo restricted to JP
1106             'url': 'sJL6WA-aGkQ',
1107             'only_matching': True,
1108         },
1109         {
1110             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1111             'only_matching': True,
1112         },
1113         {
1114             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1115             'only_matching': True,
1116         },
1117         {
1118             # DRM protected
1119             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1120             'only_matching': True,
1121         },
1122         {
1123             # Video with unsupported adaptive stream type formats
1124             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1125             'info_dict': {
1126                 'id': 'Z4Vy8R84T1U',
1127                 'ext': 'mp4',
1128                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1129                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1130                 'duration': 433,
1131                 'upload_date': '20130923',
1132                 'uploader': 'Amelia Putri Harwita',
1133                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1134                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1135                 'formats': 'maxcount:10',
1136             },
1137             'params': {
1138                 'skip_download': True,
1139                 'youtube_include_dash_manifest': False,
1140             },
1141             'skip': 'not actual anymore',
1142         },
1143         {
1144             # Youtube Music Auto-generated description
1145             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1146             'info_dict': {
1147                 'id': 'MgNrAu2pzNs',
1148                 'ext': 'mp4',
1149                 'title': 'Voyeur Girl',
1150                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1151                 'upload_date': '20190312',
1152                 'uploader': 'Stephen - Topic',
1153                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1154                 'artist': 'Stephen',
1155                 'track': 'Voyeur Girl',
1156                 'album': 'it\'s too much love to know my dear',
1157                 'release_date': '20190313',
1158                 'release_year': 2019,
1159             },
1160             'params': {
1161                 'skip_download': True,
1162             },
1163         },
1164         {
1165             # Youtube Music Auto-generated description
1166             # Retrieve 'artist' field from 'Artist:' in video description
1167             # when it is present on youtube music video
1168             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1169             'info_dict': {
1170                 'id': 'k0jLE7tTwjY',
1171                 'ext': 'mp4',
1172                 'title': 'Latch Feat. Sam Smith',
1173                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1174                 'upload_date': '20150110',
1175                 'uploader': 'Various Artists - Topic',
1176                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1177                 'artist': 'Disclosure',
1178                 'track': 'Latch Feat. Sam Smith',
1179                 'album': 'Latch Featuring Sam Smith',
1180                 'release_date': '20121008',
1181                 'release_year': 2012,
1182             },
1183             'params': {
1184                 'skip_download': True,
1185             },
1186         },
1187         {
1188             # Youtube Music Auto-generated description
1189             # handle multiple artists on youtube music video
1190             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1191             'info_dict': {
1192                 'id': '74qn0eJSjpA',
1193                 'ext': 'mp4',
1194                 'title': 'Eastside',
1195                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1196                 'upload_date': '20180710',
1197                 'uploader': 'Benny Blanco - Topic',
1198                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1199                 'artist': 'benny blanco, Halsey, Khalid',
1200                 'track': 'Eastside',
1201                 'album': 'Eastside',
1202                 'release_date': '20180713',
1203                 'release_year': 2018,
1204             },
1205             'params': {
1206                 'skip_download': True,
1207             },
1208         },
1209         {
1210             # Youtube Music Auto-generated description
1211             # handle youtube music video with release_year and no release_date
1212             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1213             'info_dict': {
1214                 'id': '-hcAI0g-f5M',
1215                 'ext': 'mp4',
1216                 'title': 'Put It On Me',
1217                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1218                 'upload_date': '20180426',
1219                 'uploader': 'Matt Maeson - Topic',
1220                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1221                 'artist': 'Matt Maeson',
1222                 'track': 'Put It On Me',
1223                 'album': 'The Hearse',
1224                 'release_date': None,
1225                 'release_year': 2018,
1226             },
1227             'params': {
1228                 'skip_download': True,
1229             },
1230         },
1231         {
1232             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1233             'only_matching': True,
1234         },
1235         {
1236             # invalid -> valid video id redirection
1237             'url': 'DJztXj2GPfl',
1238             'info_dict': {
1239                 'id': 'DJztXj2GPfk',
1240                 'ext': 'mp4',
1241                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1242                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1243                 'upload_date': '20090125',
1244                 'uploader': 'Prochorowka',
1245                 'uploader_id': 'Prochorowka',
1246                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1247                 'artist': 'Panjabi MC',
1248                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1249                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1250             },
1251             'params': {
1252                 'skip_download': True,
1253             },
1254         }
1255     ]
1256
1257     def __init__(self, *args, **kwargs):
1258         super(YoutubeIE, self).__init__(*args, **kwargs)
1259         self._player_cache = {}
1260
1261     def report_video_info_webpage_download(self, video_id):
1262         """Report attempt to download video info webpage."""
1263         self.to_screen('%s: Downloading video info webpage' % video_id)
1264
1265     def report_information_extraction(self, video_id):
1266         """Report attempt to extract video information."""
1267         self.to_screen('%s: Extracting video information' % video_id)
1268
1269     def report_unavailable_format(self, video_id, format):
1270         """Report extracted video URL."""
1271         self.to_screen('%s: Format %s not available' % (video_id, format))
1272
1273     def report_rtmp_download(self):
1274         """Indicate the download will use the RTMP protocol."""
1275         self.to_screen('RTMP download detected')
1276
1277     def _signature_cache_id(self, example_sig):
1278         """ Return a string representation of a signature """
1279         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1280
1281     @classmethod
1282     def _extract_player_info(cls, player_url):
1283         for player_re in cls._PLAYER_INFO_RE:
1284             id_m = re.search(player_re, player_url)
1285             if id_m:
1286                 break
1287         else:
1288             raise ExtractorError('Cannot identify player %r' % player_url)
1289         return id_m.group('ext'), id_m.group('id')
1290
1291     def _extract_signature_function(self, video_id, player_url, example_sig):
1292         player_type, player_id = self._extract_player_info(player_url)
1293
1294         # Read from filesystem cache
1295         func_id = '%s_%s_%s' % (
1296             player_type, player_id, self._signature_cache_id(example_sig))
1297         assert os.path.basename(func_id) == func_id
1298
1299         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1300         if cache_spec is not None:
1301             return lambda s: ''.join(s[i] for i in cache_spec)
1302
1303         download_note = (
1304             'Downloading player %s' % player_url
1305             if self._downloader.params.get('verbose') else
1306             'Downloading %s player %s' % (player_type, player_id)
1307         )
1308         if player_type == 'js':
1309             code = self._download_webpage(
1310                 player_url, video_id,
1311                 note=download_note,
1312                 errnote='Download of %s failed' % player_url)
1313             res = self._parse_sig_js(code)
1314         elif player_type == 'swf':
1315             urlh = self._request_webpage(
1316                 player_url, video_id,
1317                 note=download_note,
1318                 errnote='Download of %s failed' % player_url)
1319             code = urlh.read()
1320             res = self._parse_sig_swf(code)
1321         else:
1322             assert False, 'Invalid player type %r' % player_type
1323
1324         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1325         cache_res = res(test_string)
1326         cache_spec = [ord(c) for c in cache_res]
1327
1328         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1329         return res
1330
1331     def _print_sig_code(self, func, example_sig):
1332         def gen_sig_code(idxs):
1333             def _genslice(start, end, step):
1334                 starts = '' if start == 0 else str(start)
1335                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1336                 steps = '' if step == 1 else (':%d' % step)
1337                 return 's[%s%s%s]' % (starts, ends, steps)
1338
1339             step = None
1340             # Quelch pyflakes warnings - start will be set when step is set
1341             start = '(Never used)'
1342             for i, prev in zip(idxs[1:], idxs[:-1]):
1343                 if step is not None:
1344                     if i - prev == step:
1345                         continue
1346                     yield _genslice(start, prev, step)
1347                     step = None
1348                     continue
1349                 if i - prev in [-1, 1]:
1350                     step = i - prev
1351                     start = prev
1352                     continue
1353                 else:
1354                     yield 's[%d]' % prev
1355             if step is None:
1356                 yield 's[%d]' % i
1357             else:
1358                 yield _genslice(start, i, step)
1359
1360         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1361         cache_res = func(test_string)
1362         cache_spec = [ord(c) for c in cache_res]
1363         expr_code = ' + '.join(gen_sig_code(cache_spec))
1364         signature_id_tuple = '(%s)' % (
1365             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1366         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1367                 '    return %s\n') % (signature_id_tuple, expr_code)
1368         self.to_screen('Extracted signature function:\n' + code)
1369
1370     def _parse_sig_js(self, jscode):
1371         funcname = self._search_regex(
1372             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1373              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1374              r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1375              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1376              # Obsolete patterns
1377              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1378              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1379              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1380              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1381              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1382              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1383              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1384              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1385             jscode, 'Initial JS player signature function name', group='sig')
1386
1387         jsi = JSInterpreter(jscode)
1388         initial_function = jsi.extract_function(funcname)
1389         return lambda s: initial_function([s])
1390
1391     def _parse_sig_swf(self, file_contents):
1392         swfi = SWFInterpreter(file_contents)
1393         TARGET_CLASSNAME = 'SignatureDecipher'
1394         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1395         initial_function = swfi.extract_function(searched_class, 'decipher')
1396         return lambda s: initial_function([s])
1397
1398     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1399         """Turn the encrypted s field into a working signature"""
1400
1401         if player_url is None:
1402             raise ExtractorError('Cannot decrypt signature without player_url')
1403
1404         if player_url.startswith('//'):
1405             player_url = 'https:' + player_url
1406         elif not re.match(r'https?://', player_url):
1407             player_url = compat_urlparse.urljoin(
1408                 'https://www.youtube.com', player_url)
1409         try:
1410             player_id = (player_url, self._signature_cache_id(s))
1411             if player_id not in self._player_cache:
1412                 func = self._extract_signature_function(
1413                     video_id, player_url, s
1414                 )
1415                 self._player_cache[player_id] = func
1416             func = self._player_cache[player_id]
1417             if self._downloader.params.get('youtube_print_sig_code'):
1418                 self._print_sig_code(func, s)
1419             return func(s)
1420         except Exception as e:
1421             tb = traceback.format_exc()
1422             raise ExtractorError(
1423                 'Signature extraction failed: ' + tb, cause=e)
1424
1425     def _get_subtitles(self, video_id, webpage):
1426         try:
1427             subs_doc = self._download_xml(
1428                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1429                 video_id, note=False)
1430         except ExtractorError as err:
1431             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1432             return {}
1433
1434         sub_lang_list = {}
1435         for track in subs_doc.findall('track'):
1436             lang = track.attrib['lang_code']
1437             if lang in sub_lang_list:
1438                 continue
1439             sub_formats = []
1440             for ext in self._SUBTITLE_FORMATS:
1441                 params = compat_urllib_parse_urlencode({
1442                     'lang': lang,
1443                     'v': video_id,
1444                     'fmt': ext,
1445                     'name': track.attrib['name'].encode('utf-8'),
1446                 })
1447                 sub_formats.append({
1448                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1449                     'ext': ext,
1450                 })
1451             sub_lang_list[lang] = sub_formats
1452         if not sub_lang_list:
1453             self._downloader.report_warning('video doesn\'t have subtitles')
1454             return {}
1455         return sub_lang_list
1456
1457     def _get_ytplayer_config(self, video_id, webpage):
1458         patterns = (
1459             # User data may contain arbitrary character sequences that may affect
1460             # JSON extraction with regex, e.g. when '};' is contained the second
1461             # regex won't capture the whole JSON. Yet working around by trying more
1462             # concrete regex first keeping in mind proper quoted string handling
1463             # to be implemented in future that will replace this workaround (see
1464             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1465             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1466             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1467             r';ytplayer\.config\s*=\s*({.+?});',
1468         )
1469         config = self._search_regex(
1470             patterns, webpage, 'ytplayer.config', default=None)
1471         if config:
1472             return self._parse_json(
1473                 uppercase_escape(config), video_id, fatal=False)
1474
1475     def _get_automatic_captions(self, video_id, webpage):
1476         """We need the webpage for getting the captions url, pass it as an
1477            argument to speed up the process."""
1478         self.to_screen('%s: Looking for automatic captions' % video_id)
1479         player_config = self._get_ytplayer_config(video_id, webpage)
1480         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1481         if not player_config:
1482             self._downloader.report_warning(err_msg)
1483             return {}
1484         try:
1485             args = player_config['args']
1486             caption_url = args.get('ttsurl')
1487             if caption_url:
1488                 timestamp = args['timestamp']
1489                 # We get the available subtitles
1490                 list_params = compat_urllib_parse_urlencode({
1491                     'type': 'list',
1492                     'tlangs': 1,
1493                     'asrs': 1,
1494                 })
1495                 list_url = caption_url + '&' + list_params
1496                 caption_list = self._download_xml(list_url, video_id)
1497                 original_lang_node = caption_list.find('track')
1498                 if original_lang_node is None:
1499                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1500                     return {}
1501                 original_lang = original_lang_node.attrib['lang_code']
1502                 caption_kind = original_lang_node.attrib.get('kind', '')
1503
1504                 sub_lang_list = {}
1505                 for lang_node in caption_list.findall('target'):
1506                     sub_lang = lang_node.attrib['lang_code']
1507                     sub_formats = []
1508                     for ext in self._SUBTITLE_FORMATS:
1509                         params = compat_urllib_parse_urlencode({
1510                             'lang': original_lang,
1511                             'tlang': sub_lang,
1512                             'fmt': ext,
1513                             'ts': timestamp,
1514                             'kind': caption_kind,
1515                         })
1516                         sub_formats.append({
1517                             'url': caption_url + '&' + params,
1518                             'ext': ext,
1519                         })
1520                     sub_lang_list[sub_lang] = sub_formats
1521                 return sub_lang_list
1522
1523             def make_captions(sub_url, sub_langs):
1524                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1525                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1526                 captions = {}
1527                 for sub_lang in sub_langs:
1528                     sub_formats = []
1529                     for ext in self._SUBTITLE_FORMATS:
1530                         caption_qs.update({
1531                             'tlang': [sub_lang],
1532                             'fmt': [ext],
1533                         })
1534                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1535                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1536                         sub_formats.append({
1537                             'url': sub_url,
1538                             'ext': ext,
1539                         })
1540                     captions[sub_lang] = sub_formats
1541                 return captions
1542
1543             # New captions format as of 22.06.2017
1544             player_response = args.get('player_response')
1545             if player_response and isinstance(player_response, compat_str):
1546                 player_response = self._parse_json(
1547                     player_response, video_id, fatal=False)
1548                 if player_response:
1549                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1550                     base_url = renderer['captionTracks'][0]['baseUrl']
1551                     sub_lang_list = []
1552                     for lang in renderer['translationLanguages']:
1553                         lang_code = lang.get('languageCode')
1554                         if lang_code:
1555                             sub_lang_list.append(lang_code)
1556                     return make_captions(base_url, sub_lang_list)
1557
1558             # Some videos don't provide ttsurl but rather caption_tracks and
1559             # caption_translation_languages (e.g. 20LmZk1hakA)
1560             # Does not used anymore as of 22.06.2017
1561             caption_tracks = args['caption_tracks']
1562             caption_translation_languages = args['caption_translation_languages']
1563             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1564             sub_lang_list = []
1565             for lang in caption_translation_languages.split(','):
1566                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1567                 sub_lang = lang_qs.get('lc', [None])[0]
1568                 if sub_lang:
1569                     sub_lang_list.append(sub_lang)
1570             return make_captions(caption_url, sub_lang_list)
1571         # An extractor error can be raise by the download process if there are
1572         # no automatic captions but there are subtitles
1573         except (KeyError, IndexError, ExtractorError):
1574             self._downloader.report_warning(err_msg)
1575             return {}
1576
1577     def _mark_watched(self, video_id, video_info, player_response):
1578         playback_url = url_or_none(try_get(
1579             player_response,
1580             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1581             video_info, lambda x: x['videostats_playback_base_url'][0]))
1582         if not playback_url:
1583             return
1584         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1585         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1586
1587         # cpn generation algorithm is reverse engineered from base.js.
1588         # In fact it works even with dummy cpn.
1589         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1590         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1591
1592         qs.update({
1593             'ver': ['2'],
1594             'cpn': [cpn],
1595         })
1596         playback_url = compat_urlparse.urlunparse(
1597             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1598
1599         self._download_webpage(
1600             playback_url, video_id, 'Marking watched',
1601             'Unable to mark watched', fatal=False)
1602
1603     @staticmethod
1604     def _extract_urls(webpage):
1605         # Embedded YouTube player
1606         entries = [
1607             unescapeHTML(mobj.group('url'))
1608             for mobj in re.finditer(r'''(?x)
1609             (?:
1610                 <iframe[^>]+?src=|
1611                 data-video-url=|
1612                 <embed[^>]+?src=|
1613                 embedSWF\(?:\s*|
1614                 <object[^>]+data=|
1615                 new\s+SWFObject\(
1616             )
1617             (["\'])
1618                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1619                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1620             \1''', webpage)]
1621
1622         # lazyYT YouTube embed
1623         entries.extend(list(map(
1624             unescapeHTML,
1625             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1626
1627         # Wordpress "YouTube Video Importer" plugin
1628         matches = re.findall(r'''(?x)<div[^>]+
1629             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1630             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1631         entries.extend(m[-1] for m in matches)
1632
1633         return entries
1634
1635     @staticmethod
1636     def _extract_url(webpage):
1637         urls = YoutubeIE._extract_urls(webpage)
1638         return urls[0] if urls else None
1639
1640     @classmethod
1641     def extract_id(cls, url):
1642         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1643         if mobj is None:
1644             raise ExtractorError('Invalid URL: %s' % url)
1645         video_id = mobj.group(2)
1646         return video_id
1647
1648     @staticmethod
1649     def _extract_chapters(description, duration):
1650         if not description:
1651             return None
1652         chapter_lines = re.findall(
1653             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1654             description)
1655         if not chapter_lines:
1656             return None
1657         chapters = []
1658         for next_num, (chapter_line, time_point) in enumerate(
1659                 chapter_lines, start=1):
1660             start_time = parse_duration(time_point)
1661             if start_time is None:
1662                 continue
1663             if start_time > duration:
1664                 break
1665             end_time = (duration if next_num == len(chapter_lines)
1666                         else parse_duration(chapter_lines[next_num][1]))
1667             if end_time is None:
1668                 continue
1669             if end_time > duration:
1670                 end_time = duration
1671             if start_time > end_time:
1672                 break
1673             chapter_title = re.sub(
1674                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1675             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1676             chapters.append({
1677                 'start_time': start_time,
1678                 'end_time': end_time,
1679                 'title': chapter_title,
1680             })
1681         return chapters
1682
1683     def _real_extract(self, url):
1684         url, smuggled_data = unsmuggle_url(url, {})
1685
1686         proto = (
1687             'http' if self._downloader.params.get('prefer_insecure', False)
1688             else 'https')
1689
1690         start_time = None
1691         end_time = None
1692         parsed_url = compat_urllib_parse_urlparse(url)
1693         for component in [parsed_url.fragment, parsed_url.query]:
1694             query = compat_parse_qs(component)
1695             if start_time is None and 't' in query:
1696                 start_time = parse_duration(query['t'][0])
1697             if start_time is None and 'start' in query:
1698                 start_time = parse_duration(query['start'][0])
1699             if end_time is None and 'end' in query:
1700                 end_time = parse_duration(query['end'][0])
1701
1702         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1703         mobj = re.search(self._NEXT_URL_RE, url)
1704         if mobj:
1705             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1706         video_id = self.extract_id(url)
1707
1708         # Get video webpage
1709         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1710         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1711
1712         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1713         video_id = qs.get('v', [None])[0] or video_id
1714
1715         # Attempt to extract SWF player URL
1716         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1717         if mobj is not None:
1718             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1719         else:
1720             player_url = None
1721
1722         dash_mpds = []
1723
1724         def add_dash_mpd(video_info):
1725             dash_mpd = video_info.get('dashmpd')
1726             if dash_mpd and dash_mpd[0] not in dash_mpds:
1727                 dash_mpds.append(dash_mpd[0])
1728
1729         def add_dash_mpd_pr(pl_response):
1730             dash_mpd = url_or_none(try_get(
1731                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1732                 compat_str))
1733             if dash_mpd and dash_mpd not in dash_mpds:
1734                 dash_mpds.append(dash_mpd)
1735
1736         is_live = None
1737         view_count = None
1738
1739         def extract_view_count(v_info):
1740             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1741
1742         def extract_player_response(player_response, video_id):
1743             pl_response = str_or_none(player_response)
1744             if not pl_response:
1745                 return
1746             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1747             if isinstance(pl_response, dict):
1748                 add_dash_mpd_pr(pl_response)
1749                 return pl_response
1750
1751         player_response = {}
1752
1753         # Get video info
1754         video_info = {}
1755         embed_webpage = None
1756         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1757             age_gate = True
1758             # We simulate the access to the video from www.youtube.com/v/{video_id}
1759             # this can be viewed without login into Youtube
1760             url = proto + '://www.youtube.com/embed/%s' % video_id
1761             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1762             data = compat_urllib_parse_urlencode({
1763                 'video_id': video_id,
1764                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1765                 'sts': self._search_regex(
1766                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1767             })
1768             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1769             try:
1770                 video_info_webpage = self._download_webpage(
1771                     video_info_url, video_id,
1772                     note='Refetching age-gated info webpage',
1773                     errnote='unable to download video info webpage')
1774             except ExtractorError:
1775                 video_info_webpage = None
1776             if video_info_webpage:
1777                 video_info = compat_parse_qs(video_info_webpage)
1778                 pl_response = video_info.get('player_response', [None])[0]
1779                 player_response = extract_player_response(pl_response, video_id)
1780                 add_dash_mpd(video_info)
1781                 view_count = extract_view_count(video_info)
1782         else:
1783             age_gate = False
1784             # Try looking directly into the video webpage
1785             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1786             if ytplayer_config:
1787                 args = ytplayer_config['args']
1788                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1789                     # Convert to the same format returned by compat_parse_qs
1790                     video_info = dict((k, [v]) for k, v in args.items())
1791                     add_dash_mpd(video_info)
1792                 # Rental video is not rented but preview is available (e.g.
1793                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1794                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1795                 if not video_info and args.get('ypc_vid'):
1796                     return self.url_result(
1797                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1798                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1799                     is_live = True
1800                 if not player_response:
1801                     player_response = extract_player_response(args.get('player_response'), video_id)
1802             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1803                 add_dash_mpd_pr(player_response)
1804
1805         def extract_unavailable_message():
1806             messages = []
1807             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1808                 msg = self._html_search_regex(
1809                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1810                     video_webpage, 'unavailable %s' % kind, default=None)
1811                 if msg:
1812                     messages.append(msg)
1813             if messages:
1814                 return '\n'.join(messages)
1815
1816         if not video_info and not player_response:
1817             unavailable_message = extract_unavailable_message()
1818             if not unavailable_message:
1819                 unavailable_message = 'Unable to extract video data'
1820             raise ExtractorError(
1821                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1822
1823         if not isinstance(video_info, dict):
1824             video_info = {}
1825
1826         video_details = try_get(
1827             player_response, lambda x: x['videoDetails'], dict) or {}
1828
1829         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1830         if not video_title:
1831             self._downloader.report_warning('Unable to extract video title')
1832             video_title = '_'
1833
1834         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1835         if video_description:
1836
1837             def replace_url(m):
1838                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1839                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1840                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1841                     qs = compat_parse_qs(parsed_redir_url.query)
1842                     q = qs.get('q')
1843                     if q and q[0]:
1844                         return q[0]
1845                 return redir_url
1846
1847             description_original = video_description = re.sub(r'''(?x)
1848                 <a\s+
1849                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1850                     (?:title|href)="([^"]+)"\s+
1851                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1852                     class="[^"]*"[^>]*>
1853                 [^<]+\.{3}\s*
1854                 </a>
1855             ''', replace_url, video_description)
1856             video_description = clean_html(video_description)
1857         else:
1858             video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1859
1860         if not smuggled_data.get('force_singlefeed', False):
1861             if not self._downloader.params.get('noplaylist'):
1862                 multifeed_metadata_list = try_get(
1863                     player_response,
1864                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1865                     compat_str) or try_get(
1866                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1867                 if multifeed_metadata_list:
1868                     entries = []
1869                     feed_ids = []
1870                     for feed in multifeed_metadata_list.split(','):
1871                         # Unquote should take place before split on comma (,) since textual
1872                         # fields may contain comma as well (see
1873                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1874                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1875
1876                         def feed_entry(name):
1877                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1878
1879                         feed_id = feed_entry('id')
1880                         if not feed_id:
1881                             continue
1882                         feed_title = feed_entry('title')
1883                         title = video_title
1884                         if feed_title:
1885                             title += ' (%s)' % feed_title
1886                         entries.append({
1887                             '_type': 'url_transparent',
1888                             'ie_key': 'Youtube',
1889                             'url': smuggle_url(
1890                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1891                                 {'force_singlefeed': True}),
1892                             'title': title,
1893                         })
1894                         feed_ids.append(feed_id)
1895                     self.to_screen(
1896                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1897                         % (', '.join(feed_ids), video_id))
1898                     return self.playlist_result(entries, video_id, video_title, video_description)
1899             else:
1900                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1901
1902         if view_count is None:
1903             view_count = extract_view_count(video_info)
1904         if view_count is None and video_details:
1905             view_count = int_or_none(video_details.get('viewCount'))
1906
1907         if is_live is None:
1908             is_live = bool_or_none(video_details.get('isLive'))
1909
1910         # Check for "rental" videos
1911         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1912             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1913
1914         def _extract_filesize(media_url):
1915             return int_or_none(self._search_regex(
1916                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1917
1918         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1919         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1920
1921         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1922             self.report_rtmp_download()
1923             formats = [{
1924                 'format_id': '_rtmp',
1925                 'protocol': 'rtmp',
1926                 'url': video_info['conn'][0],
1927                 'player_url': player_url,
1928             }]
1929         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1930             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1931             if 'rtmpe%3Dyes' in encoded_url_map:
1932                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1933             formats = []
1934             formats_spec = {}
1935             fmt_list = video_info.get('fmt_list', [''])[0]
1936             if fmt_list:
1937                 for fmt in fmt_list.split(','):
1938                     spec = fmt.split('/')
1939                     if len(spec) > 1:
1940                         width_height = spec[1].split('x')
1941                         if len(width_height) == 2:
1942                             formats_spec[spec[0]] = {
1943                                 'resolution': spec[1],
1944                                 'width': int_or_none(width_height[0]),
1945                                 'height': int_or_none(width_height[1]),
1946                             }
1947             for fmt in streaming_formats:
1948                 itag = str_or_none(fmt.get('itag'))
1949                 if not itag:
1950                     continue
1951                 quality = fmt.get('quality')
1952                 quality_label = fmt.get('qualityLabel') or quality
1953                 formats_spec[itag] = {
1954                     'asr': int_or_none(fmt.get('audioSampleRate')),
1955                     'filesize': int_or_none(fmt.get('contentLength')),
1956                     'format_note': quality_label,
1957                     'fps': int_or_none(fmt.get('fps')),
1958                     'height': int_or_none(fmt.get('height')),
1959                     # bitrate for itag 43 is always 2147483647
1960                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1961                     'width': int_or_none(fmt.get('width')),
1962                 }
1963
1964             for fmt in streaming_formats:
1965                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
1966                     continue
1967                 url = url_or_none(fmt.get('url'))
1968
1969                 if not url:
1970                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
1971                     if not cipher:
1972                         continue
1973                     url_data = compat_parse_qs(cipher)
1974                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1975                     if not url:
1976                         continue
1977                 else:
1978                     cipher = None
1979                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1980
1981                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1982                 # Unsupported FORMAT_STREAM_TYPE_OTF
1983                 if stream_type == 3:
1984                     continue
1985
1986                 format_id = fmt.get('itag') or url_data['itag'][0]
1987                 if not format_id:
1988                     continue
1989                 format_id = compat_str(format_id)
1990
1991                 if cipher:
1992                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1993                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1994                         jsplayer_url_json = self._search_regex(
1995                             ASSETS_RE,
1996                             embed_webpage if age_gate else video_webpage,
1997                             'JS player URL (1)', default=None)
1998                         if not jsplayer_url_json and not age_gate:
1999                             # We need the embed website after all
2000                             if embed_webpage is None:
2001                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2002                                 embed_webpage = self._download_webpage(
2003                                     embed_url, video_id, 'Downloading embed webpage')
2004                             jsplayer_url_json = self._search_regex(
2005                                 ASSETS_RE, embed_webpage, 'JS player URL')
2006
2007                         player_url = json.loads(jsplayer_url_json)
2008                         if player_url is None:
2009                             player_url_json = self._search_regex(
2010                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2011                                 video_webpage, 'age gate player URL')
2012                             player_url = json.loads(player_url_json)
2013
2014                     if 'sig' in url_data:
2015                         url += '&signature=' + url_data['sig'][0]
2016                     elif 's' in url_data:
2017                         encrypted_sig = url_data['s'][0]
2018
2019                         if self._downloader.params.get('verbose'):
2020                             if player_url is None:
2021                                 player_desc = 'unknown'
2022                             else:
2023                                 player_type, player_version = self._extract_player_info(player_url)
2024                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2025                             parts_sizes = self._signature_cache_id(encrypted_sig)
2026                             self.to_screen('{%s} signature length %s, %s' %
2027                                            (format_id, parts_sizes, player_desc))
2028
2029                         signature = self._decrypt_signature(
2030                             encrypted_sig, video_id, player_url, age_gate)
2031                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2032                         url += '&%s=%s' % (sp, signature)
2033                 if 'ratebypass' not in url:
2034                     url += '&ratebypass=yes'
2035
2036                 dct = {
2037                     'format_id': format_id,
2038                     'url': url,
2039                     'player_url': player_url,
2040                 }
2041                 if format_id in self._formats:
2042                     dct.update(self._formats[format_id])
2043                 if format_id in formats_spec:
2044                     dct.update(formats_spec[format_id])
2045
2046                 # Some itags are not included in DASH manifest thus corresponding formats will
2047                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2048                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2049                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2050                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2051
2052                 if width is None:
2053                     width = int_or_none(fmt.get('width'))
2054                 if height is None:
2055                     height = int_or_none(fmt.get('height'))
2056
2057                 filesize = int_or_none(url_data.get(
2058                     'clen', [None])[0]) or _extract_filesize(url)
2059
2060                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2061                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2062
2063                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2064                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2065                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2066
2067                 more_fields = {
2068                     'filesize': filesize,
2069                     'tbr': tbr,
2070                     'width': width,
2071                     'height': height,
2072                     'fps': fps,
2073                     'format_note': quality_label or quality,
2074                 }
2075                 for key, value in more_fields.items():
2076                     if value:
2077                         dct[key] = value
2078                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2079                 if type_:
2080                     type_split = type_.split(';')
2081                     kind_ext = type_split[0].split('/')
2082                     if len(kind_ext) == 2:
2083                         kind, _ = kind_ext
2084                         dct['ext'] = mimetype2ext(type_split[0])
2085                         if kind in ('audio', 'video'):
2086                             codecs = None
2087                             for mobj in re.finditer(
2088                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2089                                 if mobj.group('key') == 'codecs':
2090                                     codecs = mobj.group('val')
2091                                     break
2092                             if codecs:
2093                                 dct.update(parse_codecs(codecs))
2094                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2095                     dct['downloader_options'] = {
2096                         # Youtube throttles chunks >~10M
2097                         'http_chunk_size': 10485760,
2098                     }
2099                 formats.append(dct)
2100         else:
2101             manifest_url = (
2102                 url_or_none(try_get(
2103                     player_response,
2104                     lambda x: x['streamingData']['hlsManifestUrl'],
2105                     compat_str))
2106                 or url_or_none(try_get(
2107                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2108             if manifest_url:
2109                 formats = []
2110                 m3u8_formats = self._extract_m3u8_formats(
2111                     manifest_url, video_id, 'mp4', fatal=False)
2112                 for a_format in m3u8_formats:
2113                     itag = self._search_regex(
2114                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2115                     if itag:
2116                         a_format['format_id'] = itag
2117                         if itag in self._formats:
2118                             dct = self._formats[itag].copy()
2119                             dct.update(a_format)
2120                             a_format = dct
2121                     a_format['player_url'] = player_url
2122                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2123                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2124                     formats.append(a_format)
2125             else:
2126                 error_message = extract_unavailable_message()
2127                 if not error_message:
2128                     error_message = clean_html(try_get(
2129                         player_response, lambda x: x['playabilityStatus']['reason'],
2130                         compat_str))
2131                 if not error_message:
2132                     error_message = clean_html(
2133                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2134                 if error_message:
2135                     raise ExtractorError(error_message, expected=True)
2136                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2137
2138         # uploader
2139         video_uploader = try_get(
2140             video_info, lambda x: x['author'][0],
2141             compat_str) or str_or_none(video_details.get('author'))
2142         if video_uploader:
2143             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2144         else:
2145             self._downloader.report_warning('unable to extract uploader name')
2146
2147         # uploader_id
2148         video_uploader_id = None
2149         video_uploader_url = None
2150         mobj = re.search(
2151             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2152             video_webpage)
2153         if mobj is not None:
2154             video_uploader_id = mobj.group('uploader_id')
2155             video_uploader_url = mobj.group('uploader_url')
2156         else:
2157             self._downloader.report_warning('unable to extract uploader nickname')
2158
2159         channel_id = (
2160             str_or_none(video_details.get('channelId'))
2161             or self._html_search_meta(
2162                 'channelId', video_webpage, 'channel id', default=None)
2163             or self._search_regex(
2164                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2165                 video_webpage, 'channel id', default=None, group='id'))
2166         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2167
2168         # thumbnail image
2169         # We try first to get a high quality image:
2170         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2171                             video_webpage, re.DOTALL)
2172         if m_thumb is not None:
2173             video_thumbnail = m_thumb.group(1)
2174         elif 'thumbnail_url' not in video_info:
2175             self._downloader.report_warning('unable to extract video thumbnail')
2176             video_thumbnail = None
2177         else:   # don't panic if we can't find it
2178             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2179
2180         # upload date
2181         upload_date = self._html_search_meta(
2182             'datePublished', video_webpage, 'upload date', default=None)
2183         if not upload_date:
2184             upload_date = self._search_regex(
2185                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2186                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2187                 video_webpage, 'upload date', default=None)
2188         upload_date = unified_strdate(upload_date)
2189
2190         video_license = self._html_search_regex(
2191             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2192             video_webpage, 'license', default=None)
2193
2194         m_music = re.search(
2195             r'''(?x)
2196                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2197                 <ul[^>]*>\s*
2198                 <li>(?P<title>.+?)
2199                 by (?P<creator>.+?)
2200                 (?:
2201                     \(.+?\)|
2202                     <a[^>]*
2203                         (?:
2204                             \bhref=["\']/red[^>]*>|             # drop possible
2205                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2206                         )
2207                     .*?
2208                 )?</li
2209             ''',
2210             video_webpage)
2211         if m_music:
2212             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2213             video_creator = clean_html(m_music.group('creator'))
2214         else:
2215             video_alt_title = video_creator = None
2216
2217         def extract_meta(field):
2218             return self._html_search_regex(
2219                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2220                 video_webpage, field, default=None)
2221
2222         track = extract_meta('Song')
2223         artist = extract_meta('Artist')
2224         album = extract_meta('Album')
2225
2226         # Youtube Music Auto-generated description
2227         release_date = release_year = None
2228         if video_description:
2229             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2230             if mobj:
2231                 if not track:
2232                     track = mobj.group('track').strip()
2233                 if not artist:
2234                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2235                 if not album:
2236                     album = mobj.group('album'.strip())
2237                 release_year = mobj.group('release_year')
2238                 release_date = mobj.group('release_date')
2239                 if release_date:
2240                     release_date = release_date.replace('-', '')
2241                     if not release_year:
2242                         release_year = int(release_date[:4])
2243                 if release_year:
2244                     release_year = int(release_year)
2245
2246         m_episode = re.search(
2247             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2248             video_webpage)
2249         if m_episode:
2250             series = unescapeHTML(m_episode.group('series'))
2251             season_number = int(m_episode.group('season'))
2252             episode_number = int(m_episode.group('episode'))
2253         else:
2254             series = season_number = episode_number = None
2255
2256         m_cat_container = self._search_regex(
2257             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2258             video_webpage, 'categories', default=None)
2259         if m_cat_container:
2260             category = self._html_search_regex(
2261                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2262                 default=None)
2263             video_categories = None if category is None else [category]
2264         else:
2265             video_categories = None
2266
2267         video_tags = [
2268             unescapeHTML(m.group('content'))
2269             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2270
2271         def _extract_count(count_name):
2272             return str_to_int(self._search_regex(
2273                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2274                 % re.escape(count_name),
2275                 video_webpage, count_name, default=None))
2276
2277         like_count = _extract_count('like')
2278         dislike_count = _extract_count('dislike')
2279
2280         if view_count is None:
2281             view_count = str_to_int(self._search_regex(
2282                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2283                 'view count', default=None))
2284
2285         average_rating = (
2286             float_or_none(video_details.get('averageRating'))
2287             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2288
2289         # subtitles
2290         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2291         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2292
2293         video_duration = try_get(
2294             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2295         if not video_duration:
2296             video_duration = int_or_none(video_details.get('lengthSeconds'))
2297         if not video_duration:
2298             video_duration = parse_duration(self._html_search_meta(
2299                 'duration', video_webpage, 'video duration'))
2300
2301         # annotations
2302         video_annotations = None
2303         if self._downloader.params.get('writeannotations', False):
2304             xsrf_token = self._search_regex(
2305                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2306                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2307             invideo_url = try_get(
2308                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2309             if xsrf_token and invideo_url:
2310                 xsrf_field_name = self._search_regex(
2311                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2312                     video_webpage, 'xsrf field name',
2313                     group='xsrf_field_name', default='session_token')
2314                 video_annotations = self._download_webpage(
2315                     self._proto_relative_url(invideo_url),
2316                     video_id, note='Downloading annotations',
2317                     errnote='Unable to download video annotations', fatal=False,
2318                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2319
2320         chapters = self._extract_chapters(description_original, video_duration)
2321
2322         # Look for the DASH manifest
2323         if self._downloader.params.get('youtube_include_dash_manifest', True):
2324             dash_mpd_fatal = True
2325             for mpd_url in dash_mpds:
2326                 dash_formats = {}
2327                 try:
2328                     def decrypt_sig(mobj):
2329                         s = mobj.group(1)
2330                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2331                         return '/signature/%s' % dec_s
2332
2333                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2334
2335                     for df in self._extract_mpd_formats(
2336                             mpd_url, video_id, fatal=dash_mpd_fatal,
2337                             formats_dict=self._formats):
2338                         if not df.get('filesize'):
2339                             df['filesize'] = _extract_filesize(df['url'])
2340                         # Do not overwrite DASH format found in some previous DASH manifest
2341                         if df['format_id'] not in dash_formats:
2342                             dash_formats[df['format_id']] = df
2343                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2344                         # allow them to fail without bug report message if we already have
2345                         # some DASH manifest succeeded. This is temporary workaround to reduce
2346                         # burst of bug reports until we figure out the reason and whether it
2347                         # can be fixed at all.
2348                         dash_mpd_fatal = False
2349                 except (ExtractorError, KeyError) as e:
2350                     self.report_warning(
2351                         'Skipping DASH manifest: %r' % e, video_id)
2352                 if dash_formats:
2353                     # Remove the formats we found through non-DASH, they
2354                     # contain less info and it can be wrong, because we use
2355                     # fixed values (for example the resolution). See
2356                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2357                     # example.
2358                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2359                     formats.extend(dash_formats.values())
2360
2361         # Check for malformed aspect ratio
2362         stretched_m = re.search(
2363             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2364             video_webpage)
2365         if stretched_m:
2366             w = float(stretched_m.group('w'))
2367             h = float(stretched_m.group('h'))
2368             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2369             # We will only process correct ratios.
2370             if w > 0 and h > 0:
2371                 ratio = w / h
2372                 for f in formats:
2373                     if f.get('vcodec') != 'none':
2374                         f['stretched_ratio'] = ratio
2375
2376         if not formats:
2377             if 'reason' in video_info:
2378                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2379                     regions_allowed = self._html_search_meta(
2380                         'regionsAllowed', video_webpage, default=None)
2381                     countries = regions_allowed.split(',') if regions_allowed else None
2382                     self.raise_geo_restricted(
2383                         msg=video_info['reason'][0], countries=countries)
2384                 reason = video_info['reason'][0]
2385                 if 'Invalid parameters' in reason:
2386                     unavailable_message = extract_unavailable_message()
2387                     if unavailable_message:
2388                         reason = unavailable_message
2389                 raise ExtractorError(
2390                     'YouTube said: %s' % reason,
2391                     expected=True, video_id=video_id)
2392             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2393                 raise ExtractorError('This video is DRM protected.', expected=True)
2394
2395         self._sort_formats(formats)
2396
2397         self.mark_watched(video_id, video_info, player_response)
2398
2399         return {
2400             'id': video_id,
2401             'uploader': video_uploader,
2402             'uploader_id': video_uploader_id,
2403             'uploader_url': video_uploader_url,
2404             'channel_id': channel_id,
2405             'channel_url': channel_url,
2406             'upload_date': upload_date,
2407             'license': video_license,
2408             'creator': video_creator or artist,
2409             'title': video_title,
2410             'alt_title': video_alt_title or track,
2411             'thumbnail': video_thumbnail,
2412             'description': video_description,
2413             'categories': video_categories,
2414             'tags': video_tags,
2415             'subtitles': video_subtitles,
2416             'automatic_captions': automatic_captions,
2417             'duration': video_duration,
2418             'age_limit': 18 if age_gate else 0,
2419             'annotations': video_annotations,
2420             'chapters': chapters,
2421             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2422             'view_count': view_count,
2423             'like_count': like_count,
2424             'dislike_count': dislike_count,
2425             'average_rating': average_rating,
2426             'formats': formats,
2427             'is_live': is_live,
2428             'start_time': start_time,
2429             'end_time': end_time,
2430             'series': series,
2431             'season_number': season_number,
2432             'episode_number': episode_number,
2433             'track': track,
2434             'artist': artist,
2435             'album': album,
2436             'release_date': release_date,
2437             'release_year': release_year,
2438         }
2439
2440
2441 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2442     IE_DESC = 'YouTube.com playlists'
2443     _VALID_URL = r"""(?x)(?:
2444                         (?:https?://)?
2445                         (?:\w+\.)?
2446                         (?:
2447                             (?:
2448                                 youtube(?:kids)?\.com|
2449                                 invidio\.us
2450                             )
2451                             /
2452                             (?:
2453                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2454                                \? (?:.*?[&;])*? (?:p|a|list)=
2455                             |  p/
2456                             )|
2457                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2458                         )
2459                         (
2460                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2461                             # Top tracks, they can also include dots
2462                             |(?:MC)[\w\.]*
2463                         )
2464                         .*
2465                      |
2466                         (%(playlist_id)s)
2467                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2468     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2469     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2470     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2471     IE_NAME = 'youtube:playlist'
2472     _TESTS = [{
2473         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2474         'info_dict': {
2475             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2476             'uploader': 'Sergey M.',
2477             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2478             'title': 'youtube-dl public playlist',
2479         },
2480         'playlist_count': 1,
2481     }, {
2482         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2483         'info_dict': {
2484             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2485             'uploader': 'Sergey M.',
2486             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2487             'title': 'youtube-dl empty playlist',
2488         },
2489         'playlist_count': 0,
2490     }, {
2491         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2492         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2493         'info_dict': {
2494             'title': '29C3: Not my department',
2495             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2496             'uploader': 'Christiaan008',
2497             'uploader_id': 'ChRiStIaAn008',
2498         },
2499         'playlist_count': 96,
2500     }, {
2501         'note': 'issue #673',
2502         'url': 'PLBB231211A4F62143',
2503         'info_dict': {
2504             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2505             'id': 'PLBB231211A4F62143',
2506             'uploader': 'Wickydoo',
2507             'uploader_id': 'Wickydoo',
2508         },
2509         'playlist_mincount': 26,
2510     }, {
2511         'note': 'Large playlist',
2512         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2513         'info_dict': {
2514             'title': 'Uploads from Cauchemar',
2515             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2516             'uploader': 'Cauchemar',
2517             'uploader_id': 'Cauchemar89',
2518         },
2519         'playlist_mincount': 799,
2520     }, {
2521         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2522         'info_dict': {
2523             'title': 'YDL_safe_search',
2524             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2525         },
2526         'playlist_count': 2,
2527         'skip': 'This playlist is private',
2528     }, {
2529         'note': 'embedded',
2530         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2531         'playlist_count': 4,
2532         'info_dict': {
2533             'title': 'JODA15',
2534             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2535             'uploader': 'milan',
2536             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2537         }
2538     }, {
2539         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2540         'playlist_mincount': 485,
2541         'info_dict': {
2542             'title': '2018 Chinese New Singles (11/6 updated)',
2543             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2544             'uploader': 'LBK',
2545             'uploader_id': 'sdragonfang',
2546         }
2547     }, {
2548         'note': 'Embedded SWF player',
2549         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2550         'playlist_count': 4,
2551         'info_dict': {
2552             'title': 'JODA7',
2553             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2554         },
2555         'skip': 'This playlist does not exist',
2556     }, {
2557         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2558         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2559         'info_dict': {
2560             'title': 'Uploads from Interstellar Movie',
2561             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2562             'uploader': 'Interstellar Movie',
2563             'uploader_id': 'InterstellarMovie1',
2564         },
2565         'playlist_mincount': 21,
2566     }, {
2567         # Playlist URL that does not actually serve a playlist
2568         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2569         'info_dict': {
2570             'id': 'FqZTN594JQw',
2571             'ext': 'webm',
2572             'title': "Smiley's People 01 detective, Adventure Series, Action",
2573             'uploader': 'STREEM',
2574             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2575             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2576             'upload_date': '20150526',
2577             'license': 'Standard YouTube License',
2578             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2579             'categories': ['People & Blogs'],
2580             'tags': list,
2581             'view_count': int,
2582             'like_count': int,
2583             'dislike_count': int,
2584         },
2585         'params': {
2586             'skip_download': True,
2587         },
2588         'skip': 'This video is not available.',
2589         'add_ie': [YoutubeIE.ie_key()],
2590     }, {
2591         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2592         'info_dict': {
2593             'id': 'yeWKywCrFtk',
2594             'ext': 'mp4',
2595             'title': 'Small Scale Baler and Braiding Rugs',
2596             'uploader': 'Backus-Page House Museum',
2597             'uploader_id': 'backuspagemuseum',
2598             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2599             'upload_date': '20161008',
2600             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2601             'categories': ['Nonprofits & Activism'],
2602             'tags': list,
2603             'like_count': int,
2604             'dislike_count': int,
2605         },
2606         'params': {
2607             'noplaylist': True,
2608             'skip_download': True,
2609         },
2610     }, {
2611         # https://github.com/ytdl-org/youtube-dl/issues/21844
2612         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2613         'info_dict': {
2614             'title': 'Data Analysis with Dr Mike Pound',
2615             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2616             'uploader_id': 'Computerphile',
2617             'uploader': 'Computerphile',
2618         },
2619         'playlist_mincount': 11,
2620     }, {
2621         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2622         'only_matching': True,
2623     }, {
2624         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2625         'only_matching': True,
2626     }, {
2627         # music album playlist
2628         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2629         'only_matching': True,
2630     }, {
2631         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2632         'only_matching': True,
2633     }, {
2634         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2635         'only_matching': True,
2636     }]
2637
2638     def _real_initialize(self):
2639         self._login()
2640
2641     def extract_videos_from_page(self, page):
2642         ids_in_page = []
2643         titles_in_page = []
2644
2645         for item in re.findall(
2646                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2647             attrs = extract_attributes(item)
2648             video_id = attrs['data-video-id']
2649             video_title = unescapeHTML(attrs.get('data-title'))
2650             if video_title:
2651                 video_title = video_title.strip()
2652             ids_in_page.append(video_id)
2653             titles_in_page.append(video_title)
2654
2655         # Fallback with old _VIDEO_RE
2656         self.extract_videos_from_page_impl(
2657             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2658
2659         # Relaxed fallbacks
2660         self.extract_videos_from_page_impl(
2661             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2662             ids_in_page, titles_in_page)
2663         self.extract_videos_from_page_impl(
2664             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2665             ids_in_page, titles_in_page)
2666
2667         return zip(ids_in_page, titles_in_page)
2668
2669     def _extract_mix(self, playlist_id):
2670         # The mixes are generated from a single video
2671         # the id of the playlist is just 'RD' + video_id
2672         ids = []
2673         last_id = playlist_id[-11:]
2674         for n in itertools.count(1):
2675             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2676             webpage = self._download_webpage(
2677                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2678             new_ids = orderedSet(re.findall(
2679                 r'''(?xs)data-video-username=".*?".*?
2680                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2681                 webpage))
2682             # Fetch new pages until all the videos are repeated, it seems that
2683             # there are always 51 unique videos.
2684             new_ids = [_id for _id in new_ids if _id not in ids]
2685             if not new_ids:
2686                 break
2687             ids.extend(new_ids)
2688             last_id = ids[-1]
2689
2690         url_results = self._ids_to_results(ids)
2691
2692         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2693         title_span = (
2694             search_title('playlist-title')
2695             or search_title('title long-title')
2696             or search_title('title'))
2697         title = clean_html(title_span)
2698
2699         return self.playlist_result(url_results, playlist_id, title)
2700
2701     def _extract_playlist(self, playlist_id):
2702         url = self._TEMPLATE_URL % playlist_id
2703         page = self._download_webpage(url, playlist_id)
2704
2705         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2706         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2707             match = match.strip()
2708             # Check if the playlist exists or is private
2709             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2710             if mobj:
2711                 reason = mobj.group('reason')
2712                 message = 'This playlist %s' % reason
2713                 if 'private' in reason:
2714                     message += ', use --username or --netrc to access it'
2715                 message += '.'
2716                 raise ExtractorError(message, expected=True)
2717             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2718                 raise ExtractorError(
2719                     'Invalid parameters. Maybe URL is incorrect.',
2720                     expected=True)
2721             elif re.match(r'[^<]*Choose your language[^<]*', match):
2722                 continue
2723             else:
2724                 self.report_warning('Youtube gives an alert message: ' + match)
2725
2726         playlist_title = self._html_search_regex(
2727             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2728             page, 'title', default=None)
2729
2730         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2731         uploader = self._html_search_regex(
2732             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2733             page, 'uploader', default=None)
2734         mobj = re.search(
2735             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2736             page)
2737         if mobj:
2738             uploader_id = mobj.group('uploader_id')
2739             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2740         else:
2741             uploader_id = uploader_url = None
2742
2743         has_videos = True
2744
2745         if not playlist_title:
2746             try:
2747                 # Some playlist URLs don't actually serve a playlist (e.g.
2748                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2749                 next(self._entries(page, playlist_id))
2750             except StopIteration:
2751                 has_videos = False
2752
2753         playlist = self.playlist_result(
2754             self._entries(page, playlist_id), playlist_id, playlist_title)
2755         playlist.update({
2756             'uploader': uploader,
2757             'uploader_id': uploader_id,
2758             'uploader_url': uploader_url,
2759         })
2760
2761         return has_videos, playlist
2762
2763     def _check_download_just_video(self, url, playlist_id):
2764         # Check if it's a video-specific URL
2765         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2766         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2767             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2768             'video id', default=None)
2769         if video_id:
2770             if self._downloader.params.get('noplaylist'):
2771                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2772                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2773             else:
2774                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2775                 return video_id, None
2776         return None, None
2777
2778     def _real_extract(self, url):
2779         # Extract playlist id
2780         mobj = re.match(self._VALID_URL, url)
2781         if mobj is None:
2782             raise ExtractorError('Invalid URL: %s' % url)
2783         playlist_id = mobj.group(1) or mobj.group(2)
2784
2785         video_id, video = self._check_download_just_video(url, playlist_id)
2786         if video:
2787             return video
2788
2789         if playlist_id.startswith(('RD', 'UL', 'PU')):
2790             # Mixes require a custom extraction process
2791             return self._extract_mix(playlist_id)
2792
2793         has_videos, playlist = self._extract_playlist(playlist_id)
2794         if has_videos or not video_id:
2795             return playlist
2796
2797         # Some playlist URLs don't actually serve a playlist (see
2798         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2799         # Fallback to plain video extraction if there is a video id
2800         # along with playlist id.
2801         return self.url_result(video_id, 'Youtube', video_id=video_id)
2802
2803
2804 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2805     IE_DESC = 'YouTube.com channels'
2806     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2807     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2808     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2809     IE_NAME = 'youtube:channel'
2810     _TESTS = [{
2811         'note': 'paginated channel',
2812         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2813         'playlist_mincount': 91,
2814         'info_dict': {
2815             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2816             'title': 'Uploads from lex will',
2817             'uploader': 'lex will',
2818             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2819         }
2820     }, {
2821         'note': 'Age restricted channel',
2822         # from https://www.youtube.com/user/DeusExOfficial
2823         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2824         'playlist_mincount': 64,
2825         'info_dict': {
2826             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2827             'title': 'Uploads from Deus Ex',
2828             'uploader': 'Deus Ex',
2829             'uploader_id': 'DeusExOfficial',
2830         },
2831     }, {
2832         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2833         'only_matching': True,
2834     }, {
2835         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2836         'only_matching': True,
2837     }]
2838
2839     @classmethod
2840     def suitable(cls, url):
2841         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2842                 else super(YoutubeChannelIE, cls).suitable(url))
2843
2844     def _build_template_url(self, url, channel_id):
2845         return self._TEMPLATE_URL % channel_id
2846
2847     def _real_extract(self, url):
2848         channel_id = self._match_id(url)
2849
2850         url = self._build_template_url(url, channel_id)
2851
2852         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2853         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2854         # otherwise fallback on channel by page extraction
2855         channel_page = self._download_webpage(
2856             url + '?view=57', channel_id,
2857             'Downloading channel page', fatal=False)
2858         if channel_page is False:
2859             channel_playlist_id = False
2860         else:
2861             channel_playlist_id = self._html_search_meta(
2862                 'channelId', channel_page, 'channel id', default=None)
2863             if not channel_playlist_id:
2864                 channel_url = self._html_search_meta(
2865                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2866                     channel_page, 'channel url', default=None)
2867                 if channel_url:
2868                     channel_playlist_id = self._search_regex(
2869                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2870                         channel_url, 'channel id', default=None)
2871         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2872             playlist_id = 'UU' + channel_playlist_id[2:]
2873             return self.url_result(
2874                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2875
2876         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2877         autogenerated = re.search(r'''(?x)
2878                 class="[^"]*?(?:
2879                     channel-header-autogenerated-label|
2880                     yt-channel-title-autogenerated
2881                 )[^"]*"''', channel_page) is not None
2882
2883         if autogenerated:
2884             # The videos are contained in a single page
2885             # the ajax pages can't be used, they are empty
2886             entries = [
2887                 self.url_result(
2888                     video_id, 'Youtube', video_id=video_id,
2889                     video_title=video_title)
2890                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2891             return self.playlist_result(entries, channel_id)
2892
2893         try:
2894             next(self._entries(channel_page, channel_id))
2895         except StopIteration:
2896             alert_message = self._html_search_regex(
2897                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2898                 channel_page, 'alert', default=None, group='alert')
2899             if alert_message:
2900                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2901
2902         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2903
2904
2905 class YoutubeUserIE(YoutubeChannelIE):
2906     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2907     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2908     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2909     IE_NAME = 'youtube:user'
2910
2911     _TESTS = [{
2912         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2913         'playlist_mincount': 320,
2914         'info_dict': {
2915             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2916             'title': 'Uploads from The Linux Foundation',
2917             'uploader': 'The Linux Foundation',
2918             'uploader_id': 'TheLinuxFoundation',
2919         }
2920     }, {
2921         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2922         # but not https://www.youtube.com/user/12minuteathlete/videos
2923         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2924         'playlist_mincount': 249,
2925         'info_dict': {
2926             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2927             'title': 'Uploads from 12 Minute Athlete',
2928             'uploader': '12 Minute Athlete',
2929             'uploader_id': 'the12minuteathlete',
2930         }
2931     }, {
2932         'url': 'ytuser:phihag',
2933         'only_matching': True,
2934     }, {
2935         'url': 'https://www.youtube.com/c/gametrailers',
2936         'only_matching': True,
2937     }, {
2938         'url': 'https://www.youtube.com/gametrailers',
2939         'only_matching': True,
2940     }, {
2941         # This channel is not available, geo restricted to JP
2942         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2943         'only_matching': True,
2944     }]
2945
2946     @classmethod
2947     def suitable(cls, url):
2948         # Don't return True if the url can be extracted with other youtube
2949         # extractor, the regex would is too permissive and it would match.
2950         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2951         if any(ie.suitable(url) for ie in other_yt_ies):
2952             return False
2953         else:
2954             return super(YoutubeUserIE, cls).suitable(url)
2955
2956     def _build_template_url(self, url, channel_id):
2957         mobj = re.match(self._VALID_URL, url)
2958         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2959
2960
2961 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2962     IE_DESC = 'YouTube.com live streams'
2963     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2964     IE_NAME = 'youtube:live'
2965
2966     _TESTS = [{
2967         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2968         'info_dict': {
2969             'id': 'a48o2S1cPoo',
2970             'ext': 'mp4',
2971             'title': 'The Young Turks - Live Main Show',
2972             'uploader': 'The Young Turks',
2973             'uploader_id': 'TheYoungTurks',
2974             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2975             'upload_date': '20150715',
2976             'license': 'Standard YouTube License',
2977             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2978             'categories': ['News & Politics'],
2979             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2980             'like_count': int,
2981             'dislike_count': int,
2982         },
2983         'params': {
2984             'skip_download': True,
2985         },
2986     }, {
2987         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2988         'only_matching': True,
2989     }, {
2990         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2991         'only_matching': True,
2992     }, {
2993         'url': 'https://www.youtube.com/TheYoungTurks/live',
2994         'only_matching': True,
2995     }]
2996
2997     def _real_extract(self, url):
2998         mobj = re.match(self._VALID_URL, url)
2999         channel_id = mobj.group('id')
3000         base_url = mobj.group('base_url')
3001         webpage = self._download_webpage(url, channel_id, fatal=False)
3002         if webpage:
3003             page_type = self._og_search_property(
3004                 'type', webpage, 'page type', default='')
3005             video_id = self._html_search_meta(
3006                 'videoId', webpage, 'video id', default=None)
3007             if page_type.startswith('video') and video_id and re.match(
3008                     r'^[0-9A-Za-z_-]{11}$', video_id):
3009                 return self.url_result(video_id, YoutubeIE.ie_key())
3010         return self.url_result(base_url)
3011
3012
3013 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3014     IE_DESC = 'YouTube.com user/channel playlists'
3015     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3016     IE_NAME = 'youtube:playlists'
3017
3018     _TESTS = [{
3019         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3020         'playlist_mincount': 4,
3021         'info_dict': {
3022             'id': 'ThirstForScience',
3023             'title': 'ThirstForScience',
3024         },
3025     }, {
3026         # with "Load more" button
3027         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3028         'playlist_mincount': 70,
3029         'info_dict': {
3030             'id': 'igorkle1',
3031             'title': 'Игорь Клейнер',
3032         },
3033     }, {
3034         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3035         'playlist_mincount': 17,
3036         'info_dict': {
3037             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3038             'title': 'Chem Player',
3039         },
3040         'skip': 'Blocked',
3041     }]
3042
3043
3044 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3045     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3046
3047
3048 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3049     IE_DESC = 'YouTube.com searches'
3050     # there doesn't appear to be a real limit, for example if you search for
3051     # 'python' you get more than 8.000.000 results
3052     _MAX_RESULTS = float('inf')
3053     IE_NAME = 'youtube:search'
3054     _SEARCH_KEY = 'ytsearch'
3055     _EXTRA_QUERY_ARGS = {}
3056     _TESTS = []
3057
3058     def _get_n_results(self, query, n):
3059         """Get a specified number of results for a query"""
3060
3061         videos = []
3062         limit = n
3063
3064         url_query = {
3065             'search_query': query.encode('utf-8'),
3066         }
3067         url_query.update(self._EXTRA_QUERY_ARGS)
3068         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3069
3070         for pagenum in itertools.count(1):
3071             data = self._download_json(
3072                 result_url, video_id='query "%s"' % query,
3073                 note='Downloading page %s' % pagenum,
3074                 errnote='Unable to download API page',
3075                 query={'spf': 'navigate'})
3076             html_content = data[1]['body']['content']
3077
3078             if 'class="search-message' in html_content:
3079                 raise ExtractorError(
3080                     '[youtube] No video results', expected=True)
3081
3082             new_videos = list(self._process_page(html_content))
3083             videos += new_videos
3084             if not new_videos or len(videos) > limit:
3085                 break
3086             next_link = self._html_search_regex(
3087                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3088                 html_content, 'next link', default=None)
3089             if next_link is None:
3090                 break
3091             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3092
3093         if len(videos) > n:
3094             videos = videos[:n]
3095         return self.playlist_result(videos, query)
3096
3097
3098 class YoutubeSearchDateIE(YoutubeSearchIE):
3099     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3100     _SEARCH_KEY = 'ytsearchdate'
3101     IE_DESC = 'YouTube.com searches, newest videos first'
3102     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3103
3104
3105 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3106     IE_DESC = 'YouTube.com search URLs'
3107     IE_NAME = 'youtube:search_url'
3108     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3109     _TESTS = [{
3110         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3111         'playlist_mincount': 5,
3112         'info_dict': {
3113             'title': 'youtube-dl test video',
3114         }
3115     }, {
3116         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3117         'only_matching': True,
3118     }]
3119
3120     def _real_extract(self, url):
3121         mobj = re.match(self._VALID_URL, url)
3122         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3123         webpage = self._download_webpage(url, query)
3124         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3125
3126
3127 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3128     IE_DESC = 'YouTube.com (multi-season) shows'
3129     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3130     IE_NAME = 'youtube:show'
3131     _TESTS = [{
3132         'url': 'https://www.youtube.com/show/airdisasters',
3133         'playlist_mincount': 5,
3134         'info_dict': {
3135             'id': 'airdisasters',
3136             'title': 'Air Disasters',
3137         }
3138     }]
3139
3140     def _real_extract(self, url):
3141         playlist_id = self._match_id(url)
3142         return super(YoutubeShowIE, self)._real_extract(
3143             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3144
3145
3146 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3147     """
3148     Base class for feed extractors
3149     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3150     """
3151     _LOGIN_REQUIRED = True
3152
3153     @property
3154     def IE_NAME(self):
3155         return 'youtube:%s' % self._FEED_NAME
3156
3157     def _real_initialize(self):
3158         self._login()
3159
3160     def _entries(self, page):
3161         # The extraction process is the same as for playlists, but the regex
3162         # for the video ids doesn't contain an index
3163         ids = []
3164         more_widget_html = content_html = page
3165         for page_num in itertools.count(1):
3166             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3167
3168             # 'recommended' feed has infinite 'load more' and each new portion spins
3169             # the same videos in (sometimes) slightly different order, so we'll check
3170             # for unicity and break when portion has no new videos
3171             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3172             if not new_ids:
3173                 break
3174
3175             ids.extend(new_ids)
3176
3177             for entry in self._ids_to_results(new_ids):
3178                 yield entry
3179
3180             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3181             if not mobj:
3182                 break
3183
3184             more = self._download_json(
3185                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3186                 'Downloading page #%s' % page_num,
3187                 transform_source=uppercase_escape)
3188             content_html = more['content_html']
3189             more_widget_html = more['load_more_widget_html']
3190
3191     def _real_extract(self, url):
3192         page = self._download_webpage(
3193             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3194             self._PLAYLIST_TITLE)
3195         return self.playlist_result(
3196             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3197
3198
3199 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3200     IE_NAME = 'youtube:watchlater'
3201     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3202     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3203
3204     _TESTS = [{
3205         'url': 'https://www.youtube.com/playlist?list=WL',
3206         'only_matching': True,
3207     }, {
3208         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3209         'only_matching': True,
3210     }]
3211
3212     def _real_extract(self, url):
3213         _, video = self._check_download_just_video(url, 'WL')
3214         if video:
3215             return video
3216         _, playlist = self._extract_playlist('WL')
3217         return playlist
3218
3219
3220 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3221     IE_NAME = 'youtube:favorites'
3222     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3223     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3224     _LOGIN_REQUIRED = True
3225
3226     def _real_extract(self, url):
3227         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3228         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3229         return self.url_result(playlist_id, 'YoutubePlaylist')
3230
3231
3232 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3233     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3234     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3235     _FEED_NAME = 'recommended'
3236     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3237
3238
3239 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3240     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3241     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3242     _FEED_NAME = 'subscriptions'
3243     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3244
3245
3246 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3247     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3248     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3249     _FEED_NAME = 'history'
3250     _PLAYLIST_TITLE = 'Youtube History'
3251
3252
3253 class YoutubeTruncatedURLIE(InfoExtractor):
3254     IE_NAME = 'youtube:truncated_url'
3255     IE_DESC = False  # Do not list
3256     _VALID_URL = r'''(?x)
3257         (?:https?://)?
3258         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3259         (?:watch\?(?:
3260             feature=[a-z_]+|
3261             annotation_id=annotation_[^&]+|
3262             x-yt-cl=[0-9]+|
3263             hl=[^&]*|
3264             t=[0-9]+
3265         )?
3266         |
3267             attribution_link\?a=[^&]+
3268         )
3269         $
3270     '''
3271
3272     _TESTS = [{
3273         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3274         'only_matching': True,
3275     }, {
3276         'url': 'https://www.youtube.com/watch?',
3277         'only_matching': True,
3278     }, {
3279         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3280         'only_matching': True,
3281     }, {
3282         'url': 'https://www.youtube.com/watch?feature=foo',
3283         'only_matching': True,
3284     }, {
3285         'url': 'https://www.youtube.com/watch?hl=en-GB',
3286         'only_matching': True,
3287     }, {
3288         'url': 'https://www.youtube.com/watch?t=2372',
3289         'only_matching': True,
3290     }]
3291
3292     def _real_extract(self, url):
3293         raise ExtractorError(
3294             'Did you forget to quote the URL? Remember that & is a meta '
3295             'character in most shells, so you want to put the URL in quotes, '
3296             'like  youtube-dl '
3297             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3298             ' or simply  youtube-dl BaW_jenozKc  .',
3299             expected=True)
3300
3301
3302 class YoutubeTruncatedIDIE(InfoExtractor):
3303     IE_NAME = 'youtube:truncated_id'
3304     IE_DESC = False  # Do not list
3305     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3306
3307     _TESTS = [{
3308         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3309         'only_matching': True,
3310     }]
3311
3312     def _real_extract(self, url):
3313         video_id = self._match_id(url)
3314         raise ExtractorError(
3315             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3316             expected=True)