[youtube] Extract additional meta data from video description on youtube music videos
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5
6 import itertools
7 import json
8 import os.path
9 import random
10 import re
11 import time
12 import traceback
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
18     compat_chr,
19     compat_kwargs,
20     compat_parse_qs,
21     compat_urllib_parse_unquote,
22     compat_urllib_parse_unquote_plus,
23     compat_urllib_parse_urlencode,
24     compat_urllib_parse_urlparse,
25     compat_urlparse,
26     compat_str,
27 )
28 from ..utils import (
29     clean_html,
30     error_to_compat_str,
31     ExtractorError,
32     float_or_none,
33     get_element_by_attribute,
34     get_element_by_id,
35     int_or_none,
36     mimetype2ext,
37     orderedSet,
38     parse_codecs,
39     parse_duration,
40     qualities,
41     remove_quotes,
42     remove_start,
43     smuggle_url,
44     str_or_none,
45     str_to_int,
46     try_get,
47     unescapeHTML,
48     unified_strdate,
49     unsmuggle_url,
50     uppercase_escape,
51     url_or_none,
52     urlencode_postdata,
53 )
54
55
56 class YoutubeBaseInfoExtractor(InfoExtractor):
57     """Provide base functions for Youtube extractors"""
58     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
59     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
60
61     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
62     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
63     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
64
65     _NETRC_MACHINE = 'youtube'
66     # If True it will raise an error if no login info is provided
67     _LOGIN_REQUIRED = False
68
69     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
70
71     def _set_language(self):
72         self._set_cookie(
73             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
74             # YouTube sets the expire time to about two months
75             expire_time=time.time() + 2 * 30 * 24 * 3600)
76
77     def _ids_to_results(self, ids):
78         return [
79             self.url_result(vid_id, 'Youtube', video_id=vid_id)
80             for vid_id in ids]
81
82     def _login(self):
83         """
84         Attempt to log in to YouTube.
85         True is returned if successful or skipped.
86         False is returned if login failed.
87
88         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
89         """
90         username, password = self._get_login_info()
91         # No authentication to be performed
92         if username is None:
93             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
94                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95             return True
96
97         login_page = self._download_webpage(
98             self._LOGIN_URL, None,
99             note='Downloading login page',
100             errnote='unable to fetch login page', fatal=False)
101         if login_page is False:
102             return
103
104         login_form = self._hidden_inputs(login_page)
105
106         def req(url, f_req, note, errnote):
107             data = login_form.copy()
108             data.update({
109                 'pstMsg': 1,
110                 'checkConnection': 'youtube',
111                 'checkedDomains': 'youtube',
112                 'hl': 'en',
113                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
114                 'f.req': json.dumps(f_req),
115                 'flowName': 'GlifWebSignIn',
116                 'flowEntry': 'ServiceLogin',
117             })
118             return self._download_json(
119                 url, None, note=note, errnote=errnote,
120                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
121                 fatal=False,
122                 data=urlencode_postdata(data), headers={
123                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
124                     'Google-Accounts-XSRF': 1,
125                 })
126
127         def warn(message):
128             self._downloader.report_warning(message)
129
130         lookup_req = [
131             username,
132             None, [], None, 'US', None, None, 2, False, True,
133             [
134                 None, None,
135                 [2, 1, None, 1,
136                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
137                  None, [], 4],
138                 1, [None, None, []], None, None, None, True
139             ],
140             username,
141         ]
142
143         lookup_results = req(
144             self._LOOKUP_URL, lookup_req,
145             'Looking up account info', 'Unable to look up account info')
146
147         if lookup_results is False:
148             return False
149
150         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
151         if not user_hash:
152             warn('Unable to extract user hash')
153             return False
154
155         challenge_req = [
156             user_hash,
157             None, 1, None, [1, None, None, None, [password, None, True]],
158             [
159                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
160                 1, [None, None, []], None, None, None, True
161             ]]
162
163         challenge_results = req(
164             self._CHALLENGE_URL, challenge_req,
165             'Logging in', 'Unable to log in')
166
167         if challenge_results is False:
168             return
169
170         login_res = try_get(challenge_results, lambda x: x[0][5], list)
171         if login_res:
172             login_msg = try_get(login_res, lambda x: x[5], compat_str)
173             warn(
174                 'Unable to login: %s' % 'Invalid password'
175                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
176             return False
177
178         res = try_get(challenge_results, lambda x: x[0][-1], list)
179         if not res:
180             warn('Unable to extract result entry')
181             return False
182
183         login_challenge = try_get(res, lambda x: x[0][0], list)
184         if login_challenge:
185             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
186             if challenge_str == 'TWO_STEP_VERIFICATION':
187                 # SEND_SUCCESS - TFA code has been successfully sent to phone
188                 # QUOTA_EXCEEDED - reached the limit of TFA codes
189                 status = try_get(login_challenge, lambda x: x[5], compat_str)
190                 if status == 'QUOTA_EXCEEDED':
191                     warn('Exceeded the limit of TFA codes, try later')
192                     return False
193
194                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
195                 if not tl:
196                     warn('Unable to extract TL')
197                     return False
198
199                 tfa_code = self._get_tfa_info('2-step verification code')
200
201                 if not tfa_code:
202                     warn(
203                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
204                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205                     return False
206
207                 tfa_code = remove_start(tfa_code, 'G-')
208
209                 tfa_req = [
210                     user_hash, None, 2, None,
211                     [
212                         9, None, None, None, None, None, None, None,
213                         [None, tfa_code, True, 2]
214                     ]]
215
216                 tfa_results = req(
217                     self._TFA_URL.format(tl), tfa_req,
218                     'Submitting TFA code', 'Unable to submit TFA code')
219
220                 if tfa_results is False:
221                     return False
222
223                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
224                 if tfa_res:
225                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
226                     warn(
227                         'Unable to finish TFA: %s' % 'Invalid TFA code'
228                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
229                     return False
230
231                 check_cookie_url = try_get(
232                     tfa_results, lambda x: x[0][-1][2], compat_str)
233             else:
234                 CHALLENGES = {
235                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
236                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
237                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
238                 }
239                 challenge = CHALLENGES.get(
240                     challenge_str,
241                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
242                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
243                 return False
244         else:
245             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
246
247         if not check_cookie_url:
248             warn('Unable to extract CheckCookie URL')
249             return False
250
251         check_cookie_results = self._download_webpage(
252             check_cookie_url, None, 'Checking cookie', fatal=False)
253
254         if check_cookie_results is False:
255             return False
256
257         if 'https://myaccount.google.com/' not in check_cookie_results:
258             warn('Unable to log in')
259             return False
260
261         return True
262
263     def _download_webpage_handle(self, *args, **kwargs):
264         query = kwargs.get('query', {}).copy()
265         query['disable_polymer'] = 'true'
266         kwargs['query'] = query
267         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
268             *args, **compat_kwargs(kwargs))
269
270     def _real_initialize(self):
271         if self._downloader is None:
272             return
273         self._set_language()
274         if not self._login():
275             return
276
277
278 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
279     # Extract entries from page with "Load more" button
280     def _entries(self, page, playlist_id):
281         more_widget_html = content_html = page
282         for page_num in itertools.count(1):
283             for entry in self._process_page(content_html):
284                 yield entry
285
286             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
287             if not mobj:
288                 break
289
290             more = self._download_json(
291                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
292                 'Downloading page #%s' % page_num,
293                 transform_source=uppercase_escape)
294             content_html = more['content_html']
295             if not content_html.strip():
296                 # Some webpages show a "Load more" button but they don't
297                 # have more videos
298                 break
299             more_widget_html = more['load_more_widget_html']
300
301
302 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
303     def _process_page(self, content):
304         for video_id, video_title in self.extract_videos_from_page(content):
305             yield self.url_result(video_id, 'Youtube', video_id, video_title)
306
307     def extract_videos_from_page(self, page):
308         ids_in_page = []
309         titles_in_page = []
310         for mobj in re.finditer(self._VIDEO_RE, page):
311             # The link with index 0 is not the first video of the playlist (not sure if still actual)
312             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
313                 continue
314             video_id = mobj.group('id')
315             video_title = unescapeHTML(mobj.group('title'))
316             if video_title:
317                 video_title = video_title.strip()
318             try:
319                 idx = ids_in_page.index(video_id)
320                 if video_title and not titles_in_page[idx]:
321                     titles_in_page[idx] = video_title
322             except ValueError:
323                 ids_in_page.append(video_id)
324                 titles_in_page.append(video_title)
325         return zip(ids_in_page, titles_in_page)
326
327
328 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
329     def _process_page(self, content):
330         for playlist_id in orderedSet(re.findall(
331                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
332                 content)):
333             yield self.url_result(
334                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
335
336     def _real_extract(self, url):
337         playlist_id = self._match_id(url)
338         webpage = self._download_webpage(url, playlist_id)
339         title = self._og_search_title(webpage, fatal=False)
340         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
341
342
343 class YoutubeIE(YoutubeBaseInfoExtractor):
344     IE_DESC = 'YouTube.com'
345     _VALID_URL = r"""(?x)^
346                      (
347                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
348                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
349                             (?:www\.)?deturl\.com/www\.youtube\.com/|
350                             (?:www\.)?pwnyoutube\.com/|
351                             (?:www\.)?hooktube\.com/|
352                             (?:www\.)?yourepeat\.com/|
353                             tube\.majestyc\.net/|
354                             (?:(?:www|dev)\.)?invidio\.us/|
355                             (?:www\.)?invidiou\.sh/|
356                             (?:www\.)?invidious\.snopyta\.org/|
357                             (?:www\.)?invidious\.kabi\.tk/|
358                             (?:www\.)?vid\.wxzm\.sx/|
359                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
360                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
361                          (?:                                                  # the various things that can precede the ID:
362                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
363                              |(?:                                             # or the v= param in all its forms
364                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
365                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
366                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
367                                  v=
368                              )
369                          ))
370                          |(?:
371                             youtu\.be|                                        # just youtu.be/xxxx
372                             vid\.plus|                                        # or vid.plus/xxxx
373                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
374                          )/
375                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
376                          )
377                      )?                                                       # all until now is optional -> you can pass the naked ID
378                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
379                      (?!.*?\blist=
380                         (?:
381                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
382                             WL                                                # WL are handled by the watch later IE
383                         )
384                      )
385                      (?(1).+)?                                                # if we found the ID, everything can follow
386                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
387     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
388     _formats = {
389         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
390         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
391         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
392         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
393         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
394         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
395         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
396         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
397         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
398         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
399         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
400         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
401         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
402         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
403         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
404         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
405         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
406         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
407
408
409         # 3D videos
410         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
411         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
412         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
413         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
414         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
415         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
416         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
417
418         # Apple HTTP Live Streaming
419         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
420         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
421         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
422         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
423         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
424         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
425         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
426         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
427
428         # DASH mp4 video
429         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
430         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
431         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
432         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
433         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
434         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
435         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
436         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
437         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
438         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
439         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
440         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
441
442         # Dash mp4 audio
443         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
444         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
445         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
446         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
447         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
448         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
449         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
450
451         # Dash webm
452         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
453         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
454         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
455         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
456         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
457         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
458         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
459         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
461         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
462         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
463         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
465         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
467         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
468         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
469         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
470         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
471         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
472         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
473         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
474
475         # Dash webm audio
476         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
477         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
478
479         # Dash webm audio with opus inside
480         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
481         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
482         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
483
484         # RTMP (unnamed)
485         '_rtmp': {'protocol': 'rtmp'},
486     }
487     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
488
489     _GEO_BYPASS = False
490
491     IE_NAME = 'youtube'
492     _TESTS = [
493         {
494             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
495             'info_dict': {
496                 'id': 'BaW_jenozKc',
497                 'ext': 'mp4',
498                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
499                 'uploader': 'Philipp Hagemeister',
500                 'uploader_id': 'phihag',
501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
502                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
503                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
504                 'upload_date': '20121002',
505                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
506                 'categories': ['Science & Technology'],
507                 'tags': ['youtube-dl'],
508                 'duration': 10,
509                 'view_count': int,
510                 'like_count': int,
511                 'dislike_count': int,
512                 'start_time': 1,
513                 'end_time': 9,
514             }
515         },
516         {
517             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
518             'note': 'Test generic use_cipher_signature video (#897)',
519             'info_dict': {
520                 'id': 'UxxajLWwzqY',
521                 'ext': 'mp4',
522                 'upload_date': '20120506',
523                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
524                 'alt_title': 'I Love It (feat. Charli XCX)',
525                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
526                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
527                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
528                          'iconic ep', 'iconic', 'love', 'it'],
529                 'duration': 180,
530                 'uploader': 'Icona Pop',
531                 'uploader_id': 'IconaPop',
532                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
533                 'creator': 'Icona Pop',
534                 'track': 'I Love It (feat. Charli XCX)',
535                 'artist': 'Icona Pop',
536             }
537         },
538         {
539             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
540             'note': 'Test VEVO video with age protection (#956)',
541             'info_dict': {
542                 'id': '07FYdnEawAQ',
543                 'ext': 'mp4',
544                 'upload_date': '20130703',
545                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
546                 'alt_title': 'Tunnel Vision',
547                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
548                 'duration': 419,
549                 'uploader': 'justintimberlakeVEVO',
550                 'uploader_id': 'justintimberlakeVEVO',
551                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
552                 'creator': 'Justin Timberlake',
553                 'track': 'Tunnel Vision',
554                 'artist': 'Justin Timberlake',
555                 'age_limit': 18,
556             }
557         },
558         {
559             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
560             'note': 'Embed-only video (#1746)',
561             'info_dict': {
562                 'id': 'yZIXLfi8CZQ',
563                 'ext': 'mp4',
564                 'upload_date': '20120608',
565                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
566                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
567                 'uploader': 'SET India',
568                 'uploader_id': 'setindia',
569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
570                 'age_limit': 18,
571             }
572         },
573         {
574             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
575             'note': 'Use the first video ID in the URL',
576             'info_dict': {
577                 'id': 'BaW_jenozKc',
578                 'ext': 'mp4',
579                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
580                 'uploader': 'Philipp Hagemeister',
581                 'uploader_id': 'phihag',
582                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
583                 'upload_date': '20121002',
584                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
585                 'categories': ['Science & Technology'],
586                 'tags': ['youtube-dl'],
587                 'duration': 10,
588                 'view_count': int,
589                 'like_count': int,
590                 'dislike_count': int,
591             },
592             'params': {
593                 'skip_download': True,
594             },
595         },
596         {
597             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
598             'note': '256k DASH audio (format 141) via DASH manifest',
599             'info_dict': {
600                 'id': 'a9LDPn-MO4I',
601                 'ext': 'm4a',
602                 'upload_date': '20121002',
603                 'uploader_id': '8KVIDEO',
604                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
605                 'description': '',
606                 'uploader': '8KVIDEO',
607                 'title': 'UHDTV TEST 8K VIDEO.mp4'
608             },
609             'params': {
610                 'youtube_include_dash_manifest': True,
611                 'format': '141',
612             },
613             'skip': 'format 141 not served anymore',
614         },
615         # DASH manifest with encrypted signature
616         {
617             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
618             'info_dict': {
619                 'id': 'IB3lcPjvWLA',
620                 'ext': 'm4a',
621                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
622                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
623                 'duration': 244,
624                 'uploader': 'AfrojackVEVO',
625                 'uploader_id': 'AfrojackVEVO',
626                 'upload_date': '20131011',
627             },
628             'params': {
629                 'youtube_include_dash_manifest': True,
630                 'format': '141/bestaudio[ext=m4a]',
631             },
632         },
633         # JS player signature function name containing $
634         {
635             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
636             'info_dict': {
637                 'id': 'nfWlot6h_JM',
638                 'ext': 'm4a',
639                 'title': 'Taylor Swift - Shake It Off',
640                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
641                 'duration': 242,
642                 'uploader': 'TaylorSwiftVEVO',
643                 'uploader_id': 'TaylorSwiftVEVO',
644                 'upload_date': '20140818',
645                 'creator': 'Taylor Swift',
646             },
647             'params': {
648                 'youtube_include_dash_manifest': True,
649                 'format': '141/bestaudio[ext=m4a]',
650             },
651         },
652         # Controversy video
653         {
654             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
655             'info_dict': {
656                 'id': 'T4XJQO3qol8',
657                 'ext': 'mp4',
658                 'duration': 219,
659                 'upload_date': '20100909',
660                 'uploader': 'Amazing Atheist',
661                 'uploader_id': 'TheAmazingAtheist',
662                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
663                 'title': 'Burning Everyone\'s Koran',
664                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
665             }
666         },
667         # Normal age-gate video (No vevo, embed allowed)
668         {
669             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
670             'info_dict': {
671                 'id': 'HtVdAasjOgU',
672                 'ext': 'mp4',
673                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
674                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
675                 'duration': 142,
676                 'uploader': 'The Witcher',
677                 'uploader_id': 'WitcherGame',
678                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
679                 'upload_date': '20140605',
680                 'age_limit': 18,
681             },
682         },
683         # Age-gate video with encrypted signature
684         {
685             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
686             'info_dict': {
687                 'id': '6kLq3WMV1nU',
688                 'ext': 'mp4',
689                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
690                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
691                 'duration': 246,
692                 'uploader': 'LloydVEVO',
693                 'uploader_id': 'LloydVEVO',
694                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
695                 'upload_date': '20110629',
696                 'age_limit': 18,
697             },
698         },
699         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
700         # YouTube Red ad is not captured for creator
701         {
702             'url': '__2ABJjxzNo',
703             'info_dict': {
704                 'id': '__2ABJjxzNo',
705                 'ext': 'mp4',
706                 'duration': 266,
707                 'upload_date': '20100430',
708                 'uploader_id': 'deadmau5',
709                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
710                 'creator': 'deadmau5',
711                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
712                 'uploader': 'deadmau5',
713                 'title': 'Deadmau5 - Some Chords (HD)',
714                 'alt_title': 'Some Chords',
715             },
716             'expected_warnings': [
717                 'DASH manifest missing',
718             ]
719         },
720         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
721         {
722             'url': 'lqQg6PlCWgI',
723             'info_dict': {
724                 'id': 'lqQg6PlCWgI',
725                 'ext': 'mp4',
726                 'duration': 6085,
727                 'upload_date': '20150827',
728                 'uploader_id': 'olympic',
729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
730                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
731                 'uploader': 'Olympic',
732                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
733             },
734             'params': {
735                 'skip_download': 'requires avconv',
736             }
737         },
738         # Non-square pixels
739         {
740             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
741             'info_dict': {
742                 'id': '_b-2C3KPAM0',
743                 'ext': 'mp4',
744                 'stretched_ratio': 16 / 9.,
745                 'duration': 85,
746                 'upload_date': '20110310',
747                 'uploader_id': 'AllenMeow',
748                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
749                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
750                 'uploader': '孫ᄋᄅ',
751                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
752             },
753         },
754         # url_encoded_fmt_stream_map is empty string
755         {
756             'url': 'qEJwOuvDf7I',
757             'info_dict': {
758                 'id': 'qEJwOuvDf7I',
759                 'ext': 'webm',
760                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
761                 'description': '',
762                 'upload_date': '20150404',
763                 'uploader_id': 'spbelect',
764                 'uploader': 'Наблюдатели Петербурга',
765             },
766             'params': {
767                 'skip_download': 'requires avconv',
768             },
769             'skip': 'This live event has ended.',
770         },
771         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
772         {
773             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
774             'info_dict': {
775                 'id': 'FIl7x6_3R5Y',
776                 'ext': 'webm',
777                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
778                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
779                 'duration': 220,
780                 'upload_date': '20150625',
781                 'uploader_id': 'dorappi2000',
782                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
783                 'uploader': 'dorappi2000',
784                 'formats': 'mincount:31',
785             },
786             'skip': 'not actual anymore',
787         },
788         # DASH manifest with segment_list
789         {
790             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
791             'md5': '8ce563a1d667b599d21064e982ab9e31',
792             'info_dict': {
793                 'id': 'CsmdDsKjzN8',
794                 'ext': 'mp4',
795                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
796                 'uploader': 'Airtek',
797                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
798                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
799                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
800             },
801             'params': {
802                 'youtube_include_dash_manifest': True,
803                 'format': '135',  # bestvideo
804             },
805             'skip': 'This live event has ended.',
806         },
807         {
808             # Multifeed videos (multiple cameras), URL is for Main Camera
809             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
810             'info_dict': {
811                 'id': 'jqWvoWXjCVs',
812                 'title': 'teamPGP: Rocket League Noob Stream',
813                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
814             },
815             'playlist': [{
816                 'info_dict': {
817                     'id': 'jqWvoWXjCVs',
818                     'ext': 'mp4',
819                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
820                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
821                     'duration': 7335,
822                     'upload_date': '20150721',
823                     'uploader': 'Beer Games Beer',
824                     'uploader_id': 'beergamesbeer',
825                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
826                     'license': 'Standard YouTube License',
827                 },
828             }, {
829                 'info_dict': {
830                     'id': '6h8e8xoXJzg',
831                     'ext': 'mp4',
832                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
833                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
834                     'duration': 7337,
835                     'upload_date': '20150721',
836                     'uploader': 'Beer Games Beer',
837                     'uploader_id': 'beergamesbeer',
838                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
839                     'license': 'Standard YouTube License',
840                 },
841             }, {
842                 'info_dict': {
843                     'id': 'PUOgX5z9xZw',
844                     'ext': 'mp4',
845                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
846                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
847                     'duration': 7337,
848                     'upload_date': '20150721',
849                     'uploader': 'Beer Games Beer',
850                     'uploader_id': 'beergamesbeer',
851                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
852                     'license': 'Standard YouTube License',
853                 },
854             }, {
855                 'info_dict': {
856                     'id': 'teuwxikvS5k',
857                     'ext': 'mp4',
858                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
859                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
860                     'duration': 7334,
861                     'upload_date': '20150721',
862                     'uploader': 'Beer Games Beer',
863                     'uploader_id': 'beergamesbeer',
864                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
865                     'license': 'Standard YouTube License',
866                 },
867             }],
868             'params': {
869                 'skip_download': True,
870             },
871             'skip': 'This video is not available.',
872         },
873         {
874             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
875             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
876             'info_dict': {
877                 'id': 'gVfLd0zydlo',
878                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
879             },
880             'playlist_count': 2,
881             'skip': 'Not multifeed anymore',
882         },
883         {
884             'url': 'https://vid.plus/FlRa-iH7PGw',
885             'only_matching': True,
886         },
887         {
888             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
889             'only_matching': True,
890         },
891         {
892             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
893             # Also tests cut-off URL expansion in video description (see
894             # https://github.com/ytdl-org/youtube-dl/issues/1892,
895             # https://github.com/ytdl-org/youtube-dl/issues/8164)
896             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
897             'info_dict': {
898                 'id': 'lsguqyKfVQg',
899                 'ext': 'mp4',
900                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
901                 'alt_title': 'Dark Walk - Position Music',
902                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
903                 'duration': 133,
904                 'upload_date': '20151119',
905                 'uploader_id': 'IronSoulElf',
906                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
907                 'uploader': 'IronSoulElf',
908                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
909                 'track': 'Dark Walk - Position Music',
910                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
911             },
912             'params': {
913                 'skip_download': True,
914             },
915         },
916         {
917             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
918             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
919             'only_matching': True,
920         },
921         {
922             # Video with yt:stretch=17:0
923             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
924             'info_dict': {
925                 'id': 'Q39EVAstoRM',
926                 'ext': 'mp4',
927                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
928                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
929                 'upload_date': '20151107',
930                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
931                 'uploader': 'CH GAMER DROID',
932             },
933             'params': {
934                 'skip_download': True,
935             },
936             'skip': 'This video does not exist.',
937         },
938         {
939             # Video licensed under Creative Commons
940             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
941             'info_dict': {
942                 'id': 'M4gD1WSo5mA',
943                 'ext': 'mp4',
944                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
945                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
946                 'duration': 721,
947                 'upload_date': '20150127',
948                 'uploader_id': 'BerkmanCenter',
949                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
950                 'uploader': 'The Berkman Klein Center for Internet & Society',
951                 'license': 'Creative Commons Attribution license (reuse allowed)',
952             },
953             'params': {
954                 'skip_download': True,
955             },
956         },
957         {
958             # Channel-like uploader_url
959             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
960             'info_dict': {
961                 'id': 'eQcmzGIKrzg',
962                 'ext': 'mp4',
963                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
964                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
965                 'duration': 4060,
966                 'upload_date': '20151119',
967                 'uploader': 'Bernie Sanders',
968                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
970                 'license': 'Creative Commons Attribution license (reuse allowed)',
971             },
972             'params': {
973                 'skip_download': True,
974             },
975         },
976         {
977             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
978             'only_matching': True,
979         },
980         {
981             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
982             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
983             'only_matching': True,
984         },
985         {
986             # Rental video preview
987             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
988             'info_dict': {
989                 'id': 'uGpuVWrhIzE',
990                 'ext': 'mp4',
991                 'title': 'Piku - Trailer',
992                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
993                 'upload_date': '20150811',
994                 'uploader': 'FlixMatrix',
995                 'uploader_id': 'FlixMatrixKaravan',
996                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
997                 'license': 'Standard YouTube License',
998             },
999             'params': {
1000                 'skip_download': True,
1001             },
1002             'skip': 'This video is not available.',
1003         },
1004         {
1005             # YouTube Red video with episode data
1006             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1007             'info_dict': {
1008                 'id': 'iqKdEhx-dD4',
1009                 'ext': 'mp4',
1010                 'title': 'Isolation - Mind Field (Ep 1)',
1011                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1012                 'duration': 2085,
1013                 'upload_date': '20170118',
1014                 'uploader': 'Vsauce',
1015                 'uploader_id': 'Vsauce',
1016                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1017                 'series': 'Mind Field',
1018                 'season_number': 1,
1019                 'episode_number': 1,
1020             },
1021             'params': {
1022                 'skip_download': True,
1023             },
1024             'expected_warnings': [
1025                 'Skipping DASH manifest',
1026             ],
1027         },
1028         {
1029             # The following content has been identified by the YouTube community
1030             # as inappropriate or offensive to some audiences.
1031             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1032             'info_dict': {
1033                 'id': '6SJNVb0GnPI',
1034                 'ext': 'mp4',
1035                 'title': 'Race Differences in Intelligence',
1036                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1037                 'duration': 965,
1038                 'upload_date': '20140124',
1039                 'uploader': 'New Century Foundation',
1040                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1041                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1042             },
1043             'params': {
1044                 'skip_download': True,
1045             },
1046         },
1047         {
1048             # itag 212
1049             'url': '1t24XAntNCY',
1050             'only_matching': True,
1051         },
1052         {
1053             # geo restricted to JP
1054             'url': 'sJL6WA-aGkQ',
1055             'only_matching': True,
1056         },
1057         {
1058             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1059             'only_matching': True,
1060         },
1061         {
1062             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1063             'only_matching': True,
1064         },
1065         {
1066             # DRM protected
1067             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1068             'only_matching': True,
1069         },
1070         {
1071             # Video with unsupported adaptive stream type formats
1072             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1073             'info_dict': {
1074                 'id': 'Z4Vy8R84T1U',
1075                 'ext': 'mp4',
1076                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1077                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1078                 'duration': 433,
1079                 'upload_date': '20130923',
1080                 'uploader': 'Amelia Putri Harwita',
1081                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1082                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1083                 'formats': 'maxcount:10',
1084             },
1085             'params': {
1086                 'skip_download': True,
1087                 'youtube_include_dash_manifest': False,
1088             },
1089         },
1090         {
1091             # artist and track fields should return non-null, per issue #20599
1092             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1093             'info_dict': {
1094                 'id': 'MgNrAu2pzNs',
1095                 'ext': 'mp4',
1096                 'title': 'Voyeur Girl',
1097                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1098                 'upload_date': '20190312',
1099                 'uploader': 'Various Artists - Topic',
1100                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1101                 'artist': 'Stephen',
1102                 'track': 'Voyeur Girl',
1103                 'album': 'it\'s too much love to know my dear',
1104                 'release_date': '20190313',
1105                 'release_year': 2019,
1106             },
1107             'params': {
1108                 'skip_download': True,
1109             },
1110         },
1111         {
1112             # Retrieve 'artist' field from 'Artist:' in video description
1113             # when it is present on youtube music video
1114             # Some videos have release_date and no release_year -
1115             # (release_year should be extracted from release_date)
1116             # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932
1117             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1118             'info_dict': {
1119                 'id': 'k0jLE7tTwjY',
1120                 'ext': 'mp4',
1121                 'title': 'Latch Feat. Sam Smith',
1122                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1123                 'upload_date': '20150110',
1124                 'uploader': 'Various Artists - Topic',
1125                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1126                 'artist': 'Disclosure',
1127                 'track': 'Latch Feat. Sam Smith',
1128                 'album': 'Latch Featuring Sam Smith',
1129                 'release_date': '20121008',
1130                 'release_year': 2012,
1131             },
1132             'params': {
1133                 'skip_download': True,
1134             },
1135         },
1136         {
1137             # handle multiple artists on youtube music video
1138             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1139             'info_dict': {
1140                 'id': '74qn0eJSjpA',
1141                 'ext': 'mp4',
1142                 'title': 'Eastside',
1143                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1144                 'upload_date': '20180710',
1145                 'uploader': 'Benny Blanco - Topic',
1146                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1147                 'artist': 'benny blanco, Halsey, Khalid',
1148                 'track': 'Eastside',
1149                 'album': 'Eastside',
1150                 'release_date': '20180713',
1151                 'release_year': 2018,
1152             },
1153             'params': {
1154                 'skip_download': True,
1155             },
1156         },
1157         {
1158             # handle youtube music video with release_year and no release_date
1159             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1160             'info_dict': {
1161                 'id': '-hcAI0g-f5M',
1162                 'ext': 'mp4',
1163                 'title': 'Put It On Me',
1164                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1165                 'upload_date': '20180426',
1166                 'uploader': 'Matt Maeson - Topic',
1167                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1168                 'artist': 'Matt Maeson',
1169                 'track': 'Put It On Me',
1170                 'album': 'The Hearse',
1171                 'release_date': None,
1172                 'release_year': 2018,
1173             },
1174             'params': {
1175                 'skip_download': True,
1176             },
1177         },
1178     ]
1179
1180     def __init__(self, *args, **kwargs):
1181         super(YoutubeIE, self).__init__(*args, **kwargs)
1182         self._player_cache = {}
1183
1184     def report_video_info_webpage_download(self, video_id):
1185         """Report attempt to download video info webpage."""
1186         self.to_screen('%s: Downloading video info webpage' % video_id)
1187
1188     def report_information_extraction(self, video_id):
1189         """Report attempt to extract video information."""
1190         self.to_screen('%s: Extracting video information' % video_id)
1191
1192     def report_unavailable_format(self, video_id, format):
1193         """Report extracted video URL."""
1194         self.to_screen('%s: Format %s not available' % (video_id, format))
1195
1196     def report_rtmp_download(self):
1197         """Indicate the download will use the RTMP protocol."""
1198         self.to_screen('RTMP download detected')
1199
1200     def _signature_cache_id(self, example_sig):
1201         """ Return a string representation of a signature """
1202         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1203
1204     def _extract_signature_function(self, video_id, player_url, example_sig):
1205         id_m = re.match(
1206             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1207             player_url)
1208         if not id_m:
1209             raise ExtractorError('Cannot identify player %r' % player_url)
1210         player_type = id_m.group('ext')
1211         player_id = id_m.group('id')
1212
1213         # Read from filesystem cache
1214         func_id = '%s_%s_%s' % (
1215             player_type, player_id, self._signature_cache_id(example_sig))
1216         assert os.path.basename(func_id) == func_id
1217
1218         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1219         if cache_spec is not None:
1220             return lambda s: ''.join(s[i] for i in cache_spec)
1221
1222         download_note = (
1223             'Downloading player %s' % player_url
1224             if self._downloader.params.get('verbose') else
1225             'Downloading %s player %s' % (player_type, player_id)
1226         )
1227         if player_type == 'js':
1228             code = self._download_webpage(
1229                 player_url, video_id,
1230                 note=download_note,
1231                 errnote='Download of %s failed' % player_url)
1232             res = self._parse_sig_js(code)
1233         elif player_type == 'swf':
1234             urlh = self._request_webpage(
1235                 player_url, video_id,
1236                 note=download_note,
1237                 errnote='Download of %s failed' % player_url)
1238             code = urlh.read()
1239             res = self._parse_sig_swf(code)
1240         else:
1241             assert False, 'Invalid player type %r' % player_type
1242
1243         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1244         cache_res = res(test_string)
1245         cache_spec = [ord(c) for c in cache_res]
1246
1247         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1248         return res
1249
1250     def _print_sig_code(self, func, example_sig):
1251         def gen_sig_code(idxs):
1252             def _genslice(start, end, step):
1253                 starts = '' if start == 0 else str(start)
1254                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1255                 steps = '' if step == 1 else (':%d' % step)
1256                 return 's[%s%s%s]' % (starts, ends, steps)
1257
1258             step = None
1259             # Quelch pyflakes warnings - start will be set when step is set
1260             start = '(Never used)'
1261             for i, prev in zip(idxs[1:], idxs[:-1]):
1262                 if step is not None:
1263                     if i - prev == step:
1264                         continue
1265                     yield _genslice(start, prev, step)
1266                     step = None
1267                     continue
1268                 if i - prev in [-1, 1]:
1269                     step = i - prev
1270                     start = prev
1271                     continue
1272                 else:
1273                     yield 's[%d]' % prev
1274             if step is None:
1275                 yield 's[%d]' % i
1276             else:
1277                 yield _genslice(start, i, step)
1278
1279         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1280         cache_res = func(test_string)
1281         cache_spec = [ord(c) for c in cache_res]
1282         expr_code = ' + '.join(gen_sig_code(cache_spec))
1283         signature_id_tuple = '(%s)' % (
1284             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1285         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1286                 '    return %s\n') % (signature_id_tuple, expr_code)
1287         self.to_screen('Extracted signature function:\n' + code)
1288
1289     def _parse_sig_js(self, jscode):
1290         funcname = self._search_regex(
1291             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1292              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1293              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
1294              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1296             jscode, 'Initial JS player signature function name', group='sig')
1297
1298         jsi = JSInterpreter(jscode)
1299         initial_function = jsi.extract_function(funcname)
1300         return lambda s: initial_function([s])
1301
1302     def _parse_sig_swf(self, file_contents):
1303         swfi = SWFInterpreter(file_contents)
1304         TARGET_CLASSNAME = 'SignatureDecipher'
1305         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1306         initial_function = swfi.extract_function(searched_class, 'decipher')
1307         return lambda s: initial_function([s])
1308
1309     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1310         """Turn the encrypted s field into a working signature"""
1311
1312         if player_url is None:
1313             raise ExtractorError('Cannot decrypt signature without player_url')
1314
1315         if player_url.startswith('//'):
1316             player_url = 'https:' + player_url
1317         elif not re.match(r'https?://', player_url):
1318             player_url = compat_urlparse.urljoin(
1319                 'https://www.youtube.com', player_url)
1320         try:
1321             player_id = (player_url, self._signature_cache_id(s))
1322             if player_id not in self._player_cache:
1323                 func = self._extract_signature_function(
1324                     video_id, player_url, s
1325                 )
1326                 self._player_cache[player_id] = func
1327             func = self._player_cache[player_id]
1328             if self._downloader.params.get('youtube_print_sig_code'):
1329                 self._print_sig_code(func, s)
1330             return func(s)
1331         except Exception as e:
1332             tb = traceback.format_exc()
1333             raise ExtractorError(
1334                 'Signature extraction failed: ' + tb, cause=e)
1335
1336     def _get_subtitles(self, video_id, webpage):
1337         try:
1338             subs_doc = self._download_xml(
1339                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1340                 video_id, note=False)
1341         except ExtractorError as err:
1342             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1343             return {}
1344
1345         sub_lang_list = {}
1346         for track in subs_doc.findall('track'):
1347             lang = track.attrib['lang_code']
1348             if lang in sub_lang_list:
1349                 continue
1350             sub_formats = []
1351             for ext in self._SUBTITLE_FORMATS:
1352                 params = compat_urllib_parse_urlencode({
1353                     'lang': lang,
1354                     'v': video_id,
1355                     'fmt': ext,
1356                     'name': track.attrib['name'].encode('utf-8'),
1357                 })
1358                 sub_formats.append({
1359                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1360                     'ext': ext,
1361                 })
1362             sub_lang_list[lang] = sub_formats
1363         if not sub_lang_list:
1364             self._downloader.report_warning('video doesn\'t have subtitles')
1365             return {}
1366         return sub_lang_list
1367
1368     def _get_ytplayer_config(self, video_id, webpage):
1369         patterns = (
1370             # User data may contain arbitrary character sequences that may affect
1371             # JSON extraction with regex, e.g. when '};' is contained the second
1372             # regex won't capture the whole JSON. Yet working around by trying more
1373             # concrete regex first keeping in mind proper quoted string handling
1374             # to be implemented in future that will replace this workaround (see
1375             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1376             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1377             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1378             r';ytplayer\.config\s*=\s*({.+?});',
1379         )
1380         config = self._search_regex(
1381             patterns, webpage, 'ytplayer.config', default=None)
1382         if config:
1383             return self._parse_json(
1384                 uppercase_escape(config), video_id, fatal=False)
1385
1386     def _get_automatic_captions(self, video_id, webpage):
1387         """We need the webpage for getting the captions url, pass it as an
1388            argument to speed up the process."""
1389         self.to_screen('%s: Looking for automatic captions' % video_id)
1390         player_config = self._get_ytplayer_config(video_id, webpage)
1391         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1392         if not player_config:
1393             self._downloader.report_warning(err_msg)
1394             return {}
1395         try:
1396             args = player_config['args']
1397             caption_url = args.get('ttsurl')
1398             if caption_url:
1399                 timestamp = args['timestamp']
1400                 # We get the available subtitles
1401                 list_params = compat_urllib_parse_urlencode({
1402                     'type': 'list',
1403                     'tlangs': 1,
1404                     'asrs': 1,
1405                 })
1406                 list_url = caption_url + '&' + list_params
1407                 caption_list = self._download_xml(list_url, video_id)
1408                 original_lang_node = caption_list.find('track')
1409                 if original_lang_node is None:
1410                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1411                     return {}
1412                 original_lang = original_lang_node.attrib['lang_code']
1413                 caption_kind = original_lang_node.attrib.get('kind', '')
1414
1415                 sub_lang_list = {}
1416                 for lang_node in caption_list.findall('target'):
1417                     sub_lang = lang_node.attrib['lang_code']
1418                     sub_formats = []
1419                     for ext in self._SUBTITLE_FORMATS:
1420                         params = compat_urllib_parse_urlencode({
1421                             'lang': original_lang,
1422                             'tlang': sub_lang,
1423                             'fmt': ext,
1424                             'ts': timestamp,
1425                             'kind': caption_kind,
1426                         })
1427                         sub_formats.append({
1428                             'url': caption_url + '&' + params,
1429                             'ext': ext,
1430                         })
1431                     sub_lang_list[sub_lang] = sub_formats
1432                 return sub_lang_list
1433
1434             def make_captions(sub_url, sub_langs):
1435                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1436                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1437                 captions = {}
1438                 for sub_lang in sub_langs:
1439                     sub_formats = []
1440                     for ext in self._SUBTITLE_FORMATS:
1441                         caption_qs.update({
1442                             'tlang': [sub_lang],
1443                             'fmt': [ext],
1444                         })
1445                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1446                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1447                         sub_formats.append({
1448                             'url': sub_url,
1449                             'ext': ext,
1450                         })
1451                     captions[sub_lang] = sub_formats
1452                 return captions
1453
1454             # New captions format as of 22.06.2017
1455             player_response = args.get('player_response')
1456             if player_response and isinstance(player_response, compat_str):
1457                 player_response = self._parse_json(
1458                     player_response, video_id, fatal=False)
1459                 if player_response:
1460                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1461                     base_url = renderer['captionTracks'][0]['baseUrl']
1462                     sub_lang_list = []
1463                     for lang in renderer['translationLanguages']:
1464                         lang_code = lang.get('languageCode')
1465                         if lang_code:
1466                             sub_lang_list.append(lang_code)
1467                     return make_captions(base_url, sub_lang_list)
1468
1469             # Some videos don't provide ttsurl but rather caption_tracks and
1470             # caption_translation_languages (e.g. 20LmZk1hakA)
1471             # Does not used anymore as of 22.06.2017
1472             caption_tracks = args['caption_tracks']
1473             caption_translation_languages = args['caption_translation_languages']
1474             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1475             sub_lang_list = []
1476             for lang in caption_translation_languages.split(','):
1477                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1478                 sub_lang = lang_qs.get('lc', [None])[0]
1479                 if sub_lang:
1480                     sub_lang_list.append(sub_lang)
1481             return make_captions(caption_url, sub_lang_list)
1482         # An extractor error can be raise by the download process if there are
1483         # no automatic captions but there are subtitles
1484         except (KeyError, IndexError, ExtractorError):
1485             self._downloader.report_warning(err_msg)
1486             return {}
1487
1488     def _mark_watched(self, video_id, video_info, player_response):
1489         playback_url = url_or_none(try_get(
1490             player_response,
1491             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1492             video_info, lambda x: x['videostats_playback_base_url'][0]))
1493         if not playback_url:
1494             return
1495         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1496         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1497
1498         # cpn generation algorithm is reverse engineered from base.js.
1499         # In fact it works even with dummy cpn.
1500         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1501         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1502
1503         qs.update({
1504             'ver': ['2'],
1505             'cpn': [cpn],
1506         })
1507         playback_url = compat_urlparse.urlunparse(
1508             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1509
1510         self._download_webpage(
1511             playback_url, video_id, 'Marking watched',
1512             'Unable to mark watched', fatal=False)
1513
1514     @staticmethod
1515     def _extract_urls(webpage):
1516         # Embedded YouTube player
1517         entries = [
1518             unescapeHTML(mobj.group('url'))
1519             for mobj in re.finditer(r'''(?x)
1520             (?:
1521                 <iframe[^>]+?src=|
1522                 data-video-url=|
1523                 <embed[^>]+?src=|
1524                 embedSWF\(?:\s*|
1525                 <object[^>]+data=|
1526                 new\s+SWFObject\(
1527             )
1528             (["\'])
1529                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1530                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1531             \1''', webpage)]
1532
1533         # lazyYT YouTube embed
1534         entries.extend(list(map(
1535             unescapeHTML,
1536             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1537
1538         # Wordpress "YouTube Video Importer" plugin
1539         matches = re.findall(r'''(?x)<div[^>]+
1540             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1541             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1542         entries.extend(m[-1] for m in matches)
1543
1544         return entries
1545
1546     @staticmethod
1547     def _extract_url(webpage):
1548         urls = YoutubeIE._extract_urls(webpage)
1549         return urls[0] if urls else None
1550
1551     @classmethod
1552     def extract_id(cls, url):
1553         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1554         if mobj is None:
1555             raise ExtractorError('Invalid URL: %s' % url)
1556         video_id = mobj.group(2)
1557         return video_id
1558
1559     def _extract_annotations(self, video_id):
1560         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1561         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1562
1563     @staticmethod
1564     def _extract_chapters(description, duration):
1565         if not description:
1566             return None
1567         chapter_lines = re.findall(
1568             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1569             description)
1570         if not chapter_lines:
1571             return None
1572         chapters = []
1573         for next_num, (chapter_line, time_point) in enumerate(
1574                 chapter_lines, start=1):
1575             start_time = parse_duration(time_point)
1576             if start_time is None:
1577                 continue
1578             if start_time > duration:
1579                 break
1580             end_time = (duration if next_num == len(chapter_lines)
1581                         else parse_duration(chapter_lines[next_num][1]))
1582             if end_time is None:
1583                 continue
1584             if end_time > duration:
1585                 end_time = duration
1586             if start_time > end_time:
1587                 break
1588             chapter_title = re.sub(
1589                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1590             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1591             chapters.append({
1592                 'start_time': start_time,
1593                 'end_time': end_time,
1594                 'title': chapter_title,
1595             })
1596         return chapters
1597
1598     def _real_extract(self, url):
1599         url, smuggled_data = unsmuggle_url(url, {})
1600
1601         proto = (
1602             'http' if self._downloader.params.get('prefer_insecure', False)
1603             else 'https')
1604
1605         start_time = None
1606         end_time = None
1607         parsed_url = compat_urllib_parse_urlparse(url)
1608         for component in [parsed_url.fragment, parsed_url.query]:
1609             query = compat_parse_qs(component)
1610             if start_time is None and 't' in query:
1611                 start_time = parse_duration(query['t'][0])
1612             if start_time is None and 'start' in query:
1613                 start_time = parse_duration(query['start'][0])
1614             if end_time is None and 'end' in query:
1615                 end_time = parse_duration(query['end'][0])
1616
1617         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1618         mobj = re.search(self._NEXT_URL_RE, url)
1619         if mobj:
1620             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1621         video_id = self.extract_id(url)
1622
1623         # Get video webpage
1624         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1625         video_webpage = self._download_webpage(url, video_id)
1626
1627         # Attempt to extract SWF player URL
1628         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1629         if mobj is not None:
1630             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1631         else:
1632             player_url = None
1633
1634         dash_mpds = []
1635
1636         def add_dash_mpd(video_info):
1637             dash_mpd = video_info.get('dashmpd')
1638             if dash_mpd and dash_mpd[0] not in dash_mpds:
1639                 dash_mpds.append(dash_mpd[0])
1640
1641         def add_dash_mpd_pr(pl_response):
1642             dash_mpd = url_or_none(try_get(
1643                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1644                 compat_str))
1645             if dash_mpd and dash_mpd not in dash_mpds:
1646                 dash_mpds.append(dash_mpd)
1647
1648         is_live = None
1649         view_count = None
1650
1651         def extract_view_count(v_info):
1652             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1653
1654         player_response = {}
1655
1656         # Get video info
1657         embed_webpage = None
1658         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1659             age_gate = True
1660             # We simulate the access to the video from www.youtube.com/v/{video_id}
1661             # this can be viewed without login into Youtube
1662             url = proto + '://www.youtube.com/embed/%s' % video_id
1663             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1664             data = compat_urllib_parse_urlencode({
1665                 'video_id': video_id,
1666                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1667                 'sts': self._search_regex(
1668                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1669             })
1670             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1671             video_info_webpage = self._download_webpage(
1672                 video_info_url, video_id,
1673                 note='Refetching age-gated info webpage',
1674                 errnote='unable to download video info webpage')
1675             video_info = compat_parse_qs(video_info_webpage)
1676             add_dash_mpd(video_info)
1677         else:
1678             age_gate = False
1679             video_info = None
1680             sts = None
1681             # Try looking directly into the video webpage
1682             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1683             if ytplayer_config:
1684                 args = ytplayer_config['args']
1685                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1686                     # Convert to the same format returned by compat_parse_qs
1687                     video_info = dict((k, [v]) for k, v in args.items())
1688                     add_dash_mpd(video_info)
1689                 # Rental video is not rented but preview is available (e.g.
1690                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1691                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1692                 if not video_info and args.get('ypc_vid'):
1693                     return self.url_result(
1694                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1695                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1696                     is_live = True
1697                 sts = ytplayer_config.get('sts')
1698                 if not player_response:
1699                     pl_response = str_or_none(args.get('player_response'))
1700                     if pl_response:
1701                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1702                         if isinstance(pl_response, dict):
1703                             player_response = pl_response
1704             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1705                 add_dash_mpd_pr(player_response)
1706                 # We also try looking in get_video_info since it may contain different dashmpd
1707                 # URL that points to a DASH manifest with possibly different itag set (some itags
1708                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1709                 # manifest pointed by get_video_info's dashmpd).
1710                 # The general idea is to take a union of itags of both DASH manifests (for example
1711                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1712                 self.report_video_info_webpage_download(video_id)
1713                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1714                     query = {
1715                         'video_id': video_id,
1716                         'ps': 'default',
1717                         'eurl': '',
1718                         'gl': 'US',
1719                         'hl': 'en',
1720                     }
1721                     if el:
1722                         query['el'] = el
1723                     if sts:
1724                         query['sts'] = sts
1725                     video_info_webpage = self._download_webpage(
1726                         '%s://www.youtube.com/get_video_info' % proto,
1727                         video_id, note=False,
1728                         errnote='unable to download video info webpage',
1729                         fatal=False, query=query)
1730                     if not video_info_webpage:
1731                         continue
1732                     get_video_info = compat_parse_qs(video_info_webpage)
1733                     if not player_response:
1734                         pl_response = get_video_info.get('player_response', [None])[0]
1735                         if isinstance(pl_response, dict):
1736                             player_response = pl_response
1737                             add_dash_mpd_pr(player_response)
1738                     add_dash_mpd(get_video_info)
1739                     if view_count is None:
1740                         view_count = extract_view_count(get_video_info)
1741                     if not video_info:
1742                         video_info = get_video_info
1743                     get_token = get_video_info.get('token') or get_video_info.get('account_playback_token')
1744                     if get_token:
1745                         # Different get_video_info requests may report different results, e.g.
1746                         # some may report video unavailability, but some may serve it without
1747                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1748                         # the original webpage as well as el=info and el=embedded get_video_info
1749                         # requests report video unavailability due to geo restriction while
1750                         # el=detailpage succeeds and returns valid data). This is probably
1751                         # due to YouTube measures against IP ranges of hosting providers.
1752                         # Working around by preferring the first succeeded video_info containing
1753                         # the token if no such video_info yet was found.
1754                         token = video_info.get('token') or video_info.get('account_playback_token')
1755                         if not token:
1756                             video_info = get_video_info
1757                         break
1758
1759         def extract_unavailable_message():
1760             return self._html_search_regex(
1761                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1762                 video_webpage, 'unavailable message', default=None)
1763
1764         if not video_info:
1765             unavailable_message = extract_unavailable_message()
1766             if not unavailable_message:
1767                 unavailable_message = 'Unable to extract video data'
1768             raise ExtractorError(
1769                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1770
1771         token = video_info.get('token') or video_info.get('account_playback_token')
1772         if not token:
1773             if 'reason' in video_info:
1774                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1775                     regions_allowed = self._html_search_meta(
1776                         'regionsAllowed', video_webpage, default=None)
1777                     countries = regions_allowed.split(',') if regions_allowed else None
1778                     self.raise_geo_restricted(
1779                         msg=video_info['reason'][0], countries=countries)
1780                 reason = video_info['reason'][0]
1781                 if 'Invalid parameters' in reason:
1782                     unavailable_message = extract_unavailable_message()
1783                     if unavailable_message:
1784                         reason = unavailable_message
1785                 raise ExtractorError(
1786                     'YouTube said: %s' % reason,
1787                     expected=True, video_id=video_id)
1788             else:
1789                 raise ExtractorError(
1790                     '"token" parameter not in video info for unknown reason',
1791                     video_id=video_id)
1792
1793         if video_info.get('license_info'):
1794             raise ExtractorError('This video is DRM protected.', expected=True)
1795
1796         video_details = try_get(
1797             player_response, lambda x: x['videoDetails'], dict) or {}
1798
1799         # title
1800         if 'title' in video_info:
1801             video_title = video_info['title'][0]
1802         elif 'title' in player_response:
1803             video_title = video_details['title']
1804         else:
1805             self._downloader.report_warning('Unable to extract video title')
1806             video_title = '_'
1807
1808         # description
1809         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1810         if video_description:
1811
1812             def replace_url(m):
1813                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1814                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1815                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1816                     qs = compat_parse_qs(parsed_redir_url.query)
1817                     q = qs.get('q')
1818                     if q and q[0]:
1819                         return q[0]
1820                 return redir_url
1821
1822             description_original = video_description = re.sub(r'''(?x)
1823                 <a\s+
1824                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1825                     (?:title|href)="([^"]+)"\s+
1826                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1827                     class="[^"]*"[^>]*>
1828                 [^<]+\.{3}\s*
1829                 </a>
1830             ''', replace_url, video_description)
1831             video_description = clean_html(video_description)
1832         else:
1833             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1834             if fd_mobj:
1835                 video_description = unescapeHTML(fd_mobj.group(1))
1836             else:
1837                 video_description = ''
1838
1839         if not smuggled_data.get('force_singlefeed', False):
1840             if not self._downloader.params.get('noplaylist'):
1841                 multifeed_metadata_list = try_get(
1842                     player_response,
1843                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1844                     compat_str) or try_get(
1845                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1846                 if multifeed_metadata_list:
1847                     entries = []
1848                     feed_ids = []
1849                     for feed in multifeed_metadata_list.split(','):
1850                         # Unquote should take place before split on comma (,) since textual
1851                         # fields may contain comma as well (see
1852                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1853                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1854                         entries.append({
1855                             '_type': 'url_transparent',
1856                             'ie_key': 'Youtube',
1857                             'url': smuggle_url(
1858                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1859                                 {'force_singlefeed': True}),
1860                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1861                         })
1862                         feed_ids.append(feed_data['id'][0])
1863                     self.to_screen(
1864                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1865                         % (', '.join(feed_ids), video_id))
1866                     return self.playlist_result(entries, video_id, video_title, video_description)
1867             else:
1868                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1869
1870         if view_count is None:
1871             view_count = extract_view_count(video_info)
1872         if view_count is None and video_details:
1873             view_count = int_or_none(video_details.get('viewCount'))
1874
1875         # Check for "rental" videos
1876         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1877             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1878
1879         def _extract_filesize(media_url):
1880             return int_or_none(self._search_regex(
1881                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1882
1883         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1884             self.report_rtmp_download()
1885             formats = [{
1886                 'format_id': '_rtmp',
1887                 'protocol': 'rtmp',
1888                 'url': video_info['conn'][0],
1889                 'player_url': player_url,
1890             }]
1891         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1892             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1893             if 'rtmpe%3Dyes' in encoded_url_map:
1894                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1895             formats_spec = {}
1896             fmt_list = video_info.get('fmt_list', [''])[0]
1897             if fmt_list:
1898                 for fmt in fmt_list.split(','):
1899                     spec = fmt.split('/')
1900                     if len(spec) > 1:
1901                         width_height = spec[1].split('x')
1902                         if len(width_height) == 2:
1903                             formats_spec[spec[0]] = {
1904                                 'resolution': spec[1],
1905                                 'width': int_or_none(width_height[0]),
1906                                 'height': int_or_none(width_height[1]),
1907                             }
1908             q = qualities(['small', 'medium', 'hd720'])
1909             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1910             if streaming_formats:
1911                 for fmt in streaming_formats:
1912                     itag = str_or_none(fmt.get('itag'))
1913                     if not itag:
1914                         continue
1915                     quality = fmt.get('quality')
1916                     quality_label = fmt.get('qualityLabel') or quality
1917                     formats_spec[itag] = {
1918                         'asr': int_or_none(fmt.get('audioSampleRate')),
1919                         'filesize': int_or_none(fmt.get('contentLength')),
1920                         'format_note': quality_label,
1921                         'fps': int_or_none(fmt.get('fps')),
1922                         'height': int_or_none(fmt.get('height')),
1923                         'quality': q(quality),
1924                         # bitrate for itag 43 is always 2147483647
1925                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1926                         'width': int_or_none(fmt.get('width')),
1927                     }
1928             formats = []
1929             for url_data_str in encoded_url_map.split(','):
1930                 url_data = compat_parse_qs(url_data_str)
1931                 if 'itag' not in url_data or 'url' not in url_data:
1932                     continue
1933                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1934                 # Unsupported FORMAT_STREAM_TYPE_OTF
1935                 if stream_type == 3:
1936                     continue
1937                 format_id = url_data['itag'][0]
1938                 url = url_data['url'][0]
1939
1940                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1941                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1942                     jsplayer_url_json = self._search_regex(
1943                         ASSETS_RE,
1944                         embed_webpage if age_gate else video_webpage,
1945                         'JS player URL (1)', default=None)
1946                     if not jsplayer_url_json and not age_gate:
1947                         # We need the embed website after all
1948                         if embed_webpage is None:
1949                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1950                             embed_webpage = self._download_webpage(
1951                                 embed_url, video_id, 'Downloading embed webpage')
1952                         jsplayer_url_json = self._search_regex(
1953                             ASSETS_RE, embed_webpage, 'JS player URL')
1954
1955                     player_url = json.loads(jsplayer_url_json)
1956                     if player_url is None:
1957                         player_url_json = self._search_regex(
1958                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1959                             video_webpage, 'age gate player URL')
1960                         player_url = json.loads(player_url_json)
1961
1962                 if 'sig' in url_data:
1963                     url += '&signature=' + url_data['sig'][0]
1964                 elif 's' in url_data:
1965                     encrypted_sig = url_data['s'][0]
1966
1967                     if self._downloader.params.get('verbose'):
1968                         if player_url is None:
1969                             player_version = 'unknown'
1970                             player_desc = 'unknown'
1971                         else:
1972                             if player_url.endswith('swf'):
1973                                 player_version = self._search_regex(
1974                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1975                                     'flash player', fatal=False)
1976                                 player_desc = 'flash player %s' % player_version
1977                             else:
1978                                 player_version = self._search_regex(
1979                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1980                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1981                                     player_url,
1982                                     'html5 player', fatal=False)
1983                                 player_desc = 'html5 player %s' % player_version
1984
1985                         parts_sizes = self._signature_cache_id(encrypted_sig)
1986                         self.to_screen('{%s} signature length %s, %s' %
1987                                        (format_id, parts_sizes, player_desc))
1988
1989                     signature = self._decrypt_signature(
1990                         encrypted_sig, video_id, player_url, age_gate)
1991                     url += '&signature=' + signature
1992                 if 'ratebypass' not in url:
1993                     url += '&ratebypass=yes'
1994
1995                 dct = {
1996                     'format_id': format_id,
1997                     'url': url,
1998                     'player_url': player_url,
1999                 }
2000                 if format_id in self._formats:
2001                     dct.update(self._formats[format_id])
2002                 if format_id in formats_spec:
2003                     dct.update(formats_spec[format_id])
2004
2005                 # Some itags are not included in DASH manifest thus corresponding formats will
2006                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2007                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2008                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2009                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2010
2011                 filesize = int_or_none(url_data.get(
2012                     'clen', [None])[0]) or _extract_filesize(url)
2013
2014                 quality = url_data.get('quality', [None])[0]
2015
2016                 more_fields = {
2017                     'filesize': filesize,
2018                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2019                     'width': width,
2020                     'height': height,
2021                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2022                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2023                     'quality': q(quality),
2024                 }
2025                 for key, value in more_fields.items():
2026                     if value:
2027                         dct[key] = value
2028                 type_ = url_data.get('type', [None])[0]
2029                 if type_:
2030                     type_split = type_.split(';')
2031                     kind_ext = type_split[0].split('/')
2032                     if len(kind_ext) == 2:
2033                         kind, _ = kind_ext
2034                         dct['ext'] = mimetype2ext(type_split[0])
2035                         if kind in ('audio', 'video'):
2036                             codecs = None
2037                             for mobj in re.finditer(
2038                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2039                                 if mobj.group('key') == 'codecs':
2040                                     codecs = mobj.group('val')
2041                                     break
2042                             if codecs:
2043                                 dct.update(parse_codecs(codecs))
2044                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2045                     dct['downloader_options'] = {
2046                         # Youtube throttles chunks >~10M
2047                         'http_chunk_size': 10485760,
2048                     }
2049                 formats.append(dct)
2050         else:
2051             manifest_url = (
2052                 url_or_none(try_get(
2053                     player_response,
2054                     lambda x: x['streamingData']['hlsManifestUrl'],
2055                     compat_str)) or
2056                 url_or_none(try_get(
2057                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2058             if manifest_url:
2059                 formats = []
2060                 m3u8_formats = self._extract_m3u8_formats(
2061                     manifest_url, video_id, 'mp4', fatal=False)
2062                 for a_format in m3u8_formats:
2063                     itag = self._search_regex(
2064                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2065                     if itag:
2066                         a_format['format_id'] = itag
2067                         if itag in self._formats:
2068                             dct = self._formats[itag].copy()
2069                             dct.update(a_format)
2070                             a_format = dct
2071                     a_format['player_url'] = player_url
2072                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2073                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2074                     formats.append(a_format)
2075             else:
2076                 error_message = clean_html(video_info.get('reason', [None])[0])
2077                 if not error_message:
2078                     error_message = extract_unavailable_message()
2079                 if error_message:
2080                     raise ExtractorError(error_message, expected=True)
2081                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2082
2083         # uploader
2084         video_uploader = try_get(
2085             video_info, lambda x: x['author'][0],
2086             compat_str) or str_or_none(video_details.get('author'))
2087         if video_uploader:
2088             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2089         else:
2090             self._downloader.report_warning('unable to extract uploader name')
2091
2092         # uploader_id
2093         video_uploader_id = None
2094         video_uploader_url = None
2095         mobj = re.search(
2096             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2097             video_webpage)
2098         if mobj is not None:
2099             video_uploader_id = mobj.group('uploader_id')
2100             video_uploader_url = mobj.group('uploader_url')
2101         else:
2102             self._downloader.report_warning('unable to extract uploader nickname')
2103
2104         channel_id = self._html_search_meta(
2105             'channelId', video_webpage, 'channel id')
2106         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2107
2108         # thumbnail image
2109         # We try first to get a high quality image:
2110         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2111                             video_webpage, re.DOTALL)
2112         if m_thumb is not None:
2113             video_thumbnail = m_thumb.group(1)
2114         elif 'thumbnail_url' not in video_info:
2115             self._downloader.report_warning('unable to extract video thumbnail')
2116             video_thumbnail = None
2117         else:   # don't panic if we can't find it
2118             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2119
2120         # upload date
2121         upload_date = self._html_search_meta(
2122             'datePublished', video_webpage, 'upload date', default=None)
2123         if not upload_date:
2124             upload_date = self._search_regex(
2125                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2126                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2127                 video_webpage, 'upload date', default=None)
2128         upload_date = unified_strdate(upload_date)
2129
2130         video_license = self._html_search_regex(
2131             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2132             video_webpage, 'license', default=None)
2133
2134         m_music = re.search(
2135             r'''(?x)
2136                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2137                 <ul[^>]*>\s*
2138                 <li>(?P<title>.+?)
2139                 by (?P<creator>.+?)
2140                 (?:
2141                     \(.+?\)|
2142                     <a[^>]*
2143                         (?:
2144                             \bhref=["\']/red[^>]*>|             # drop possible
2145                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2146                         )
2147                     .*?
2148                 )?</li
2149             ''',
2150             video_webpage)
2151         if m_music:
2152             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2153             video_creator = clean_html(m_music.group('creator'))
2154         else:
2155             video_alt_title = video_creator = None
2156
2157         def extract_meta(field):
2158             return self._html_search_regex(
2159                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2160                 video_webpage, field, default=None)
2161
2162         track = extract_meta('Song')
2163         artist = extract_meta('Artist')
2164         album = None
2165         release_date = None
2166         release_year = None
2167
2168         description_info = video_description.split('\n\n')
2169         # If the description of the video has the youtube music auto-generated format, extract additional info
2170         if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.':
2171             track_artist = description_info[1].split(' · ')
2172             if len(track_artist) >= 2:
2173                 if track is None:
2174                     track = track_artist[0]
2175                 if artist is None:
2176                     artist = re.search(r'Artist: ([^\n]+)', description_info[-2])
2177                     if artist:
2178                         artist = artist.group(1)
2179                     if artist is None:
2180                         artist = track_artist[1]
2181                         # handle multiple artists
2182                         if len(track_artist) > 2:
2183                             for i in range(2, len(track_artist)):
2184                                 artist += ', %s' % track_artist[i]
2185             release_year = re.search(r'℗ ([0-9]+)', video_description)
2186             if release_year:
2187                 release_year = int_or_none(release_year.group(1))
2188             album = description_info[2]
2189             if description_info[4].startswith('Released on: '):
2190                 release_date = description_info[4].split(': ')[1].replace('-', '')
2191                 # extract release_year from release_date if necessary
2192                 if release_year is None:
2193                     release_year = int_or_none(release_date[0:4])
2194
2195         m_episode = re.search(
2196             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2197             video_webpage)
2198         if m_episode:
2199             series = unescapeHTML(m_episode.group('series'))
2200             season_number = int(m_episode.group('season'))
2201             episode_number = int(m_episode.group('episode'))
2202         else:
2203             series = season_number = episode_number = None
2204
2205         m_cat_container = self._search_regex(
2206             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2207             video_webpage, 'categories', default=None)
2208         if m_cat_container:
2209             category = self._html_search_regex(
2210                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2211                 default=None)
2212             video_categories = None if category is None else [category]
2213         else:
2214             video_categories = None
2215
2216         video_tags = [
2217             unescapeHTML(m.group('content'))
2218             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2219
2220         def _extract_count(count_name):
2221             return str_to_int(self._search_regex(
2222                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2223                 % re.escape(count_name),
2224                 video_webpage, count_name, default=None))
2225
2226         like_count = _extract_count('like')
2227         dislike_count = _extract_count('dislike')
2228
2229         if view_count is None:
2230             view_count = str_to_int(self._search_regex(
2231                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2232                 'view count', default=None))
2233
2234         # subtitles
2235         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2236         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2237
2238         video_duration = try_get(
2239             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2240         if not video_duration:
2241             video_duration = int_or_none(video_details.get('lengthSeconds'))
2242         if not video_duration:
2243             video_duration = parse_duration(self._html_search_meta(
2244                 'duration', video_webpage, 'video duration'))
2245
2246         # annotations
2247         video_annotations = None
2248         if self._downloader.params.get('writeannotations', False):
2249             video_annotations = self._extract_annotations(video_id)
2250
2251         chapters = self._extract_chapters(description_original, video_duration)
2252
2253         # Look for the DASH manifest
2254         if self._downloader.params.get('youtube_include_dash_manifest', True):
2255             dash_mpd_fatal = True
2256             for mpd_url in dash_mpds:
2257                 dash_formats = {}
2258                 try:
2259                     def decrypt_sig(mobj):
2260                         s = mobj.group(1)
2261                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2262                         return '/signature/%s' % dec_s
2263
2264                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2265
2266                     for df in self._extract_mpd_formats(
2267                             mpd_url, video_id, fatal=dash_mpd_fatal,
2268                             formats_dict=self._formats):
2269                         if not df.get('filesize'):
2270                             df['filesize'] = _extract_filesize(df['url'])
2271                         # Do not overwrite DASH format found in some previous DASH manifest
2272                         if df['format_id'] not in dash_formats:
2273                             dash_formats[df['format_id']] = df
2274                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2275                         # allow them to fail without bug report message if we already have
2276                         # some DASH manifest succeeded. This is temporary workaround to reduce
2277                         # burst of bug reports until we figure out the reason and whether it
2278                         # can be fixed at all.
2279                         dash_mpd_fatal = False
2280                 except (ExtractorError, KeyError) as e:
2281                     self.report_warning(
2282                         'Skipping DASH manifest: %r' % e, video_id)
2283                 if dash_formats:
2284                     # Remove the formats we found through non-DASH, they
2285                     # contain less info and it can be wrong, because we use
2286                     # fixed values (for example the resolution). See
2287                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2288                     # example.
2289                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2290                     formats.extend(dash_formats.values())
2291
2292         # Check for malformed aspect ratio
2293         stretched_m = re.search(
2294             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2295             video_webpage)
2296         if stretched_m:
2297             w = float(stretched_m.group('w'))
2298             h = float(stretched_m.group('h'))
2299             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2300             # We will only process correct ratios.
2301             if w > 0 and h > 0:
2302                 ratio = w / h
2303                 for f in formats:
2304                     if f.get('vcodec') != 'none':
2305                         f['stretched_ratio'] = ratio
2306
2307         self._sort_formats(formats)
2308
2309         self.mark_watched(video_id, video_info, player_response)
2310
2311         return {
2312             'id': video_id,
2313             'uploader': video_uploader,
2314             'uploader_id': video_uploader_id,
2315             'uploader_url': video_uploader_url,
2316             'channel_id': channel_id,
2317             'channel_url': channel_url,
2318             'upload_date': upload_date,
2319             'license': video_license,
2320             'creator': video_creator or artist,
2321             'title': video_title,
2322             'alt_title': video_alt_title or track,
2323             'thumbnail': video_thumbnail,
2324             'description': video_description,
2325             'categories': video_categories,
2326             'tags': video_tags,
2327             'subtitles': video_subtitles,
2328             'automatic_captions': automatic_captions,
2329             'duration': video_duration,
2330             'age_limit': 18 if age_gate else 0,
2331             'annotations': video_annotations,
2332             'chapters': chapters,
2333             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2334             'view_count': view_count,
2335             'like_count': like_count,
2336             'dislike_count': dislike_count,
2337             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2338             'formats': formats,
2339             'is_live': is_live,
2340             'start_time': start_time,
2341             'end_time': end_time,
2342             'series': series,
2343             'season_number': season_number,
2344             'episode_number': episode_number,
2345             'track': track,
2346             'artist': artist,
2347             'album': album,
2348             'release_date': release_date,
2349             'release_year': release_year,
2350         }
2351
2352
2353 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2354     IE_DESC = 'YouTube.com playlists'
2355     _VALID_URL = r"""(?x)(?:
2356                         (?:https?://)?
2357                         (?:\w+\.)?
2358                         (?:
2359                             (?:
2360                                 youtube\.com|
2361                                 invidio\.us
2362                             )
2363                             /
2364                             (?:
2365                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2366                                \? (?:.*?[&;])*? (?:p|a|list)=
2367                             |  p/
2368                             )|
2369                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2370                         )
2371                         (
2372                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2373                             # Top tracks, they can also include dots
2374                             |(?:MC)[\w\.]*
2375                         )
2376                         .*
2377                      |
2378                         (%(playlist_id)s)
2379                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2380     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2381     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2382     IE_NAME = 'youtube:playlist'
2383     _TESTS = [{
2384         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2385         'info_dict': {
2386             'title': 'ytdl test PL',
2387             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2388         },
2389         'playlist_count': 3,
2390     }, {
2391         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2392         'info_dict': {
2393             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2394             'title': 'YDL_Empty_List',
2395         },
2396         'playlist_count': 0,
2397         'skip': 'This playlist is private',
2398     }, {
2399         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2400         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2401         'info_dict': {
2402             'title': '29C3: Not my department',
2403             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2404         },
2405         'playlist_count': 95,
2406     }, {
2407         'note': 'issue #673',
2408         'url': 'PLBB231211A4F62143',
2409         'info_dict': {
2410             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2411             'id': 'PLBB231211A4F62143',
2412         },
2413         'playlist_mincount': 26,
2414     }, {
2415         'note': 'Large playlist',
2416         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2417         'info_dict': {
2418             'title': 'Uploads from Cauchemar',
2419             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2420         },
2421         'playlist_mincount': 799,
2422     }, {
2423         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2424         'info_dict': {
2425             'title': 'YDL_safe_search',
2426             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2427         },
2428         'playlist_count': 2,
2429         'skip': 'This playlist is private',
2430     }, {
2431         'note': 'embedded',
2432         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2433         'playlist_count': 4,
2434         'info_dict': {
2435             'title': 'JODA15',
2436             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2437         }
2438     }, {
2439         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2440         'playlist_mincount': 485,
2441         'info_dict': {
2442             'title': '2017 華語最新單曲 (2/24更新)',
2443             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2444         }
2445     }, {
2446         'note': 'Embedded SWF player',
2447         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2448         'playlist_count': 4,
2449         'info_dict': {
2450             'title': 'JODA7',
2451             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2452         }
2453     }, {
2454         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2455         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2456         'info_dict': {
2457             'title': 'Uploads from Interstellar Movie',
2458             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2459         },
2460         'playlist_mincount': 21,
2461     }, {
2462         # Playlist URL that does not actually serve a playlist
2463         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2464         'info_dict': {
2465             'id': 'FqZTN594JQw',
2466             'ext': 'webm',
2467             'title': "Smiley's People 01 detective, Adventure Series, Action",
2468             'uploader': 'STREEM',
2469             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2470             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2471             'upload_date': '20150526',
2472             'license': 'Standard YouTube License',
2473             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2474             'categories': ['People & Blogs'],
2475             'tags': list,
2476             'view_count': int,
2477             'like_count': int,
2478             'dislike_count': int,
2479         },
2480         'params': {
2481             'skip_download': True,
2482         },
2483         'add_ie': [YoutubeIE.ie_key()],
2484     }, {
2485         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2486         'info_dict': {
2487             'id': 'yeWKywCrFtk',
2488             'ext': 'mp4',
2489             'title': 'Small Scale Baler and Braiding Rugs',
2490             'uploader': 'Backus-Page House Museum',
2491             'uploader_id': 'backuspagemuseum',
2492             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2493             'upload_date': '20161008',
2494             'license': 'Standard YouTube License',
2495             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2496             'categories': ['Nonprofits & Activism'],
2497             'tags': list,
2498             'like_count': int,
2499             'dislike_count': int,
2500         },
2501         'params': {
2502             'noplaylist': True,
2503             'skip_download': True,
2504         },
2505     }, {
2506         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2507         'only_matching': True,
2508     }, {
2509         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2510         'only_matching': True,
2511     }, {
2512         # music album playlist
2513         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2514         'only_matching': True,
2515     }, {
2516         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2517         'only_matching': True,
2518     }]
2519
2520     def _real_initialize(self):
2521         self._login()
2522
2523     def _extract_mix(self, playlist_id):
2524         # The mixes are generated from a single video
2525         # the id of the playlist is just 'RD' + video_id
2526         ids = []
2527         last_id = playlist_id[-11:]
2528         for n in itertools.count(1):
2529             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2530             webpage = self._download_webpage(
2531                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2532             new_ids = orderedSet(re.findall(
2533                 r'''(?xs)data-video-username=".*?".*?
2534                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2535                 webpage))
2536             # Fetch new pages until all the videos are repeated, it seems that
2537             # there are always 51 unique videos.
2538             new_ids = [_id for _id in new_ids if _id not in ids]
2539             if not new_ids:
2540                 break
2541             ids.extend(new_ids)
2542             last_id = ids[-1]
2543
2544         url_results = self._ids_to_results(ids)
2545
2546         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2547         title_span = (
2548             search_title('playlist-title') or
2549             search_title('title long-title') or
2550             search_title('title'))
2551         title = clean_html(title_span)
2552
2553         return self.playlist_result(url_results, playlist_id, title)
2554
2555     def _extract_playlist(self, playlist_id):
2556         url = self._TEMPLATE_URL % playlist_id
2557         page = self._download_webpage(url, playlist_id)
2558
2559         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2560         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2561             match = match.strip()
2562             # Check if the playlist exists or is private
2563             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2564             if mobj:
2565                 reason = mobj.group('reason')
2566                 message = 'This playlist %s' % reason
2567                 if 'private' in reason:
2568                     message += ', use --username or --netrc to access it'
2569                 message += '.'
2570                 raise ExtractorError(message, expected=True)
2571             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2572                 raise ExtractorError(
2573                     'Invalid parameters. Maybe URL is incorrect.',
2574                     expected=True)
2575             elif re.match(r'[^<]*Choose your language[^<]*', match):
2576                 continue
2577             else:
2578                 self.report_warning('Youtube gives an alert message: ' + match)
2579
2580         playlist_title = self._html_search_regex(
2581             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2582             page, 'title', default=None)
2583
2584         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2585         uploader = self._search_regex(
2586             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2587             page, 'uploader', default=None)
2588         mobj = re.search(
2589             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2590             page)
2591         if mobj:
2592             uploader_id = mobj.group('uploader_id')
2593             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2594         else:
2595             uploader_id = uploader_url = None
2596
2597         has_videos = True
2598
2599         if not playlist_title:
2600             try:
2601                 # Some playlist URLs don't actually serve a playlist (e.g.
2602                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2603                 next(self._entries(page, playlist_id))
2604             except StopIteration:
2605                 has_videos = False
2606
2607         playlist = self.playlist_result(
2608             self._entries(page, playlist_id), playlist_id, playlist_title)
2609         playlist.update({
2610             'uploader': uploader,
2611             'uploader_id': uploader_id,
2612             'uploader_url': uploader_url,
2613         })
2614
2615         return has_videos, playlist
2616
2617     def _check_download_just_video(self, url, playlist_id):
2618         # Check if it's a video-specific URL
2619         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2620         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2621             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2622             'video id', default=None)
2623         if video_id:
2624             if self._downloader.params.get('noplaylist'):
2625                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2626                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2627             else:
2628                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2629                 return video_id, None
2630         return None, None
2631
2632     def _real_extract(self, url):
2633         # Extract playlist id
2634         mobj = re.match(self._VALID_URL, url)
2635         if mobj is None:
2636             raise ExtractorError('Invalid URL: %s' % url)
2637         playlist_id = mobj.group(1) or mobj.group(2)
2638
2639         video_id, video = self._check_download_just_video(url, playlist_id)
2640         if video:
2641             return video
2642
2643         if playlist_id.startswith(('RD', 'UL', 'PU')):
2644             # Mixes require a custom extraction process
2645             return self._extract_mix(playlist_id)
2646
2647         has_videos, playlist = self._extract_playlist(playlist_id)
2648         if has_videos or not video_id:
2649             return playlist
2650
2651         # Some playlist URLs don't actually serve a playlist (see
2652         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2653         # Fallback to plain video extraction if there is a video id
2654         # along with playlist id.
2655         return self.url_result(video_id, 'Youtube', video_id=video_id)
2656
2657
2658 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2659     IE_DESC = 'YouTube.com channels'
2660     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2661     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2662     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2663     IE_NAME = 'youtube:channel'
2664     _TESTS = [{
2665         'note': 'paginated channel',
2666         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2667         'playlist_mincount': 91,
2668         'info_dict': {
2669             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2670             'title': 'Uploads from lex will',
2671         }
2672     }, {
2673         'note': 'Age restricted channel',
2674         # from https://www.youtube.com/user/DeusExOfficial
2675         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2676         'playlist_mincount': 64,
2677         'info_dict': {
2678             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2679             'title': 'Uploads from Deus Ex',
2680         },
2681     }, {
2682         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2683         'only_matching': True,
2684     }]
2685
2686     @classmethod
2687     def suitable(cls, url):
2688         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2689                 else super(YoutubeChannelIE, cls).suitable(url))
2690
2691     def _build_template_url(self, url, channel_id):
2692         return self._TEMPLATE_URL % channel_id
2693
2694     def _real_extract(self, url):
2695         channel_id = self._match_id(url)
2696
2697         url = self._build_template_url(url, channel_id)
2698
2699         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2700         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2701         # otherwise fallback on channel by page extraction
2702         channel_page = self._download_webpage(
2703             url + '?view=57', channel_id,
2704             'Downloading channel page', fatal=False)
2705         if channel_page is False:
2706             channel_playlist_id = False
2707         else:
2708             channel_playlist_id = self._html_search_meta(
2709                 'channelId', channel_page, 'channel id', default=None)
2710             if not channel_playlist_id:
2711                 channel_url = self._html_search_meta(
2712                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2713                     channel_page, 'channel url', default=None)
2714                 if channel_url:
2715                     channel_playlist_id = self._search_regex(
2716                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2717                         channel_url, 'channel id', default=None)
2718         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2719             playlist_id = 'UU' + channel_playlist_id[2:]
2720             return self.url_result(
2721                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2722
2723         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2724         autogenerated = re.search(r'''(?x)
2725                 class="[^"]*?(?:
2726                     channel-header-autogenerated-label|
2727                     yt-channel-title-autogenerated
2728                 )[^"]*"''', channel_page) is not None
2729
2730         if autogenerated:
2731             # The videos are contained in a single page
2732             # the ajax pages can't be used, they are empty
2733             entries = [
2734                 self.url_result(
2735                     video_id, 'Youtube', video_id=video_id,
2736                     video_title=video_title)
2737                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2738             return self.playlist_result(entries, channel_id)
2739
2740         try:
2741             next(self._entries(channel_page, channel_id))
2742         except StopIteration:
2743             alert_message = self._html_search_regex(
2744                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2745                 channel_page, 'alert', default=None, group='alert')
2746             if alert_message:
2747                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2748
2749         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2750
2751
2752 class YoutubeUserIE(YoutubeChannelIE):
2753     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2754     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2755     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2756     IE_NAME = 'youtube:user'
2757
2758     _TESTS = [{
2759         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2760         'playlist_mincount': 320,
2761         'info_dict': {
2762             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2763             'title': 'Uploads from The Linux Foundation',
2764         }
2765     }, {
2766         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2767         # but not https://www.youtube.com/user/12minuteathlete/videos
2768         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2769         'playlist_mincount': 249,
2770         'info_dict': {
2771             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2772             'title': 'Uploads from 12 Minute Athlete',
2773         }
2774     }, {
2775         'url': 'ytuser:phihag',
2776         'only_matching': True,
2777     }, {
2778         'url': 'https://www.youtube.com/c/gametrailers',
2779         'only_matching': True,
2780     }, {
2781         'url': 'https://www.youtube.com/gametrailers',
2782         'only_matching': True,
2783     }, {
2784         # This channel is not available, geo restricted to JP
2785         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2786         'only_matching': True,
2787     }]
2788
2789     @classmethod
2790     def suitable(cls, url):
2791         # Don't return True if the url can be extracted with other youtube
2792         # extractor, the regex would is too permissive and it would match.
2793         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2794         if any(ie.suitable(url) for ie in other_yt_ies):
2795             return False
2796         else:
2797             return super(YoutubeUserIE, cls).suitable(url)
2798
2799     def _build_template_url(self, url, channel_id):
2800         mobj = re.match(self._VALID_URL, url)
2801         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2802
2803
2804 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2805     IE_DESC = 'YouTube.com live streams'
2806     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2807     IE_NAME = 'youtube:live'
2808
2809     _TESTS = [{
2810         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2811         'info_dict': {
2812             'id': 'a48o2S1cPoo',
2813             'ext': 'mp4',
2814             'title': 'The Young Turks - Live Main Show',
2815             'uploader': 'The Young Turks',
2816             'uploader_id': 'TheYoungTurks',
2817             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2818             'upload_date': '20150715',
2819             'license': 'Standard YouTube License',
2820             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2821             'categories': ['News & Politics'],
2822             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2823             'like_count': int,
2824             'dislike_count': int,
2825         },
2826         'params': {
2827             'skip_download': True,
2828         },
2829     }, {
2830         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2831         'only_matching': True,
2832     }, {
2833         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2834         'only_matching': True,
2835     }, {
2836         'url': 'https://www.youtube.com/TheYoungTurks/live',
2837         'only_matching': True,
2838     }]
2839
2840     def _real_extract(self, url):
2841         mobj = re.match(self._VALID_URL, url)
2842         channel_id = mobj.group('id')
2843         base_url = mobj.group('base_url')
2844         webpage = self._download_webpage(url, channel_id, fatal=False)
2845         if webpage:
2846             page_type = self._og_search_property(
2847                 'type', webpage, 'page type', default='')
2848             video_id = self._html_search_meta(
2849                 'videoId', webpage, 'video id', default=None)
2850             if page_type.startswith('video') and video_id and re.match(
2851                     r'^[0-9A-Za-z_-]{11}$', video_id):
2852                 return self.url_result(video_id, YoutubeIE.ie_key())
2853         return self.url_result(base_url)
2854
2855
2856 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2857     IE_DESC = 'YouTube.com user/channel playlists'
2858     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2859     IE_NAME = 'youtube:playlists'
2860
2861     _TESTS = [{
2862         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2863         'playlist_mincount': 4,
2864         'info_dict': {
2865             'id': 'ThirstForScience',
2866             'title': 'Thirst for Science',
2867         },
2868     }, {
2869         # with "Load more" button
2870         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2871         'playlist_mincount': 70,
2872         'info_dict': {
2873             'id': 'igorkle1',
2874             'title': 'Игорь Клейнер',
2875         },
2876     }, {
2877         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2878         'playlist_mincount': 17,
2879         'info_dict': {
2880             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2881             'title': 'Chem Player',
2882         },
2883     }]
2884
2885
2886 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2887     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2888
2889
2890 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2891     IE_DESC = 'YouTube.com searches'
2892     # there doesn't appear to be a real limit, for example if you search for
2893     # 'python' you get more than 8.000.000 results
2894     _MAX_RESULTS = float('inf')
2895     IE_NAME = 'youtube:search'
2896     _SEARCH_KEY = 'ytsearch'
2897     _EXTRA_QUERY_ARGS = {}
2898     _TESTS = []
2899
2900     def _get_n_results(self, query, n):
2901         """Get a specified number of results for a query"""
2902
2903         videos = []
2904         limit = n
2905
2906         url_query = {
2907             'search_query': query.encode('utf-8'),
2908         }
2909         url_query.update(self._EXTRA_QUERY_ARGS)
2910         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2911
2912         for pagenum in itertools.count(1):
2913             data = self._download_json(
2914                 result_url, video_id='query "%s"' % query,
2915                 note='Downloading page %s' % pagenum,
2916                 errnote='Unable to download API page',
2917                 query={'spf': 'navigate'})
2918             html_content = data[1]['body']['content']
2919
2920             if 'class="search-message' in html_content:
2921                 raise ExtractorError(
2922                     '[youtube] No video results', expected=True)
2923
2924             new_videos = list(self._process_page(html_content))
2925             videos += new_videos
2926             if not new_videos or len(videos) > limit:
2927                 break
2928             next_link = self._html_search_regex(
2929                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2930                 html_content, 'next link', default=None)
2931             if next_link is None:
2932                 break
2933             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2934
2935         if len(videos) > n:
2936             videos = videos[:n]
2937         return self.playlist_result(videos, query)
2938
2939
2940 class YoutubeSearchDateIE(YoutubeSearchIE):
2941     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2942     _SEARCH_KEY = 'ytsearchdate'
2943     IE_DESC = 'YouTube.com searches, newest videos first'
2944     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2945
2946
2947 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2948     IE_DESC = 'YouTube.com search URLs'
2949     IE_NAME = 'youtube:search_url'
2950     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2951     _TESTS = [{
2952         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2953         'playlist_mincount': 5,
2954         'info_dict': {
2955             'title': 'youtube-dl test video',
2956         }
2957     }, {
2958         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2959         'only_matching': True,
2960     }]
2961
2962     def _real_extract(self, url):
2963         mobj = re.match(self._VALID_URL, url)
2964         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2965         webpage = self._download_webpage(url, query)
2966         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2967
2968
2969 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2970     IE_DESC = 'YouTube.com (multi-season) shows'
2971     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2972     IE_NAME = 'youtube:show'
2973     _TESTS = [{
2974         'url': 'https://www.youtube.com/show/airdisasters',
2975         'playlist_mincount': 5,
2976         'info_dict': {
2977             'id': 'airdisasters',
2978             'title': 'Air Disasters',
2979         }
2980     }]
2981
2982     def _real_extract(self, url):
2983         playlist_id = self._match_id(url)
2984         return super(YoutubeShowIE, self)._real_extract(
2985             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2986
2987
2988 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2989     """
2990     Base class for feed extractors
2991     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2992     """
2993     _LOGIN_REQUIRED = True
2994
2995     @property
2996     def IE_NAME(self):
2997         return 'youtube:%s' % self._FEED_NAME
2998
2999     def _real_initialize(self):
3000         self._login()
3001
3002     def _entries(self, page):
3003         # The extraction process is the same as for playlists, but the regex
3004         # for the video ids doesn't contain an index
3005         ids = []
3006         more_widget_html = content_html = page
3007         for page_num in itertools.count(1):
3008             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3009
3010             # 'recommended' feed has infinite 'load more' and each new portion spins
3011             # the same videos in (sometimes) slightly different order, so we'll check
3012             # for unicity and break when portion has no new videos
3013             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3014             if not new_ids:
3015                 break
3016
3017             ids.extend(new_ids)
3018
3019             for entry in self._ids_to_results(new_ids):
3020                 yield entry
3021
3022             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3023             if not mobj:
3024                 break
3025
3026             more = self._download_json(
3027                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3028                 'Downloading page #%s' % page_num,
3029                 transform_source=uppercase_escape)
3030             content_html = more['content_html']
3031             more_widget_html = more['load_more_widget_html']
3032
3033     def _real_extract(self, url):
3034         page = self._download_webpage(
3035             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3036             self._PLAYLIST_TITLE)
3037         return self.playlist_result(
3038             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3039
3040
3041 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3042     IE_NAME = 'youtube:watchlater'
3043     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3044     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3045
3046     _TESTS = [{
3047         'url': 'https://www.youtube.com/playlist?list=WL',
3048         'only_matching': True,
3049     }, {
3050         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3051         'only_matching': True,
3052     }]
3053
3054     def _real_extract(self, url):
3055         _, video = self._check_download_just_video(url, 'WL')
3056         if video:
3057             return video
3058         _, playlist = self._extract_playlist('WL')
3059         return playlist
3060
3061
3062 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3063     IE_NAME = 'youtube:favorites'
3064     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3065     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3066     _LOGIN_REQUIRED = True
3067
3068     def _real_extract(self, url):
3069         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3070         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3071         return self.url_result(playlist_id, 'YoutubePlaylist')
3072
3073
3074 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3075     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3076     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3077     _FEED_NAME = 'recommended'
3078     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3079
3080
3081 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3082     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3083     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3084     _FEED_NAME = 'subscriptions'
3085     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3086
3087
3088 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3089     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3090     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3091     _FEED_NAME = 'history'
3092     _PLAYLIST_TITLE = 'Youtube History'
3093
3094
3095 class YoutubeTruncatedURLIE(InfoExtractor):
3096     IE_NAME = 'youtube:truncated_url'
3097     IE_DESC = False  # Do not list
3098     _VALID_URL = r'''(?x)
3099         (?:https?://)?
3100         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3101         (?:watch\?(?:
3102             feature=[a-z_]+|
3103             annotation_id=annotation_[^&]+|
3104             x-yt-cl=[0-9]+|
3105             hl=[^&]*|
3106             t=[0-9]+
3107         )?
3108         |
3109             attribution_link\?a=[^&]+
3110         )
3111         $
3112     '''
3113
3114     _TESTS = [{
3115         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3116         'only_matching': True,
3117     }, {
3118         'url': 'https://www.youtube.com/watch?',
3119         'only_matching': True,
3120     }, {
3121         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3122         'only_matching': True,
3123     }, {
3124         'url': 'https://www.youtube.com/watch?feature=foo',
3125         'only_matching': True,
3126     }, {
3127         'url': 'https://www.youtube.com/watch?hl=en-GB',
3128         'only_matching': True,
3129     }, {
3130         'url': 'https://www.youtube.com/watch?t=2372',
3131         'only_matching': True,
3132     }]
3133
3134     def _real_extract(self, url):
3135         raise ExtractorError(
3136             'Did you forget to quote the URL? Remember that & is a meta '
3137             'character in most shells, so you want to put the URL in quotes, '
3138             'like  youtube-dl '
3139             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3140             ' or simply  youtube-dl BaW_jenozKc  .',
3141             expected=True)
3142
3143
3144 class YoutubeTruncatedIDIE(InfoExtractor):
3145     IE_NAME = 'youtube:truncated_id'
3146     IE_DESC = False  # Do not list
3147     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3148
3149     _TESTS = [{
3150         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3151         'only_matching': True,
3152     }]
3153
3154     def _real_extract(self, url):
3155         video_id = self._match_id(url)
3156         raise ExtractorError(
3157             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3158             expected=True)