git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     uppercase_escape,
  51     url_or_none,
  52     urlencode_postdata,
  53 )
  54
  55
  56 class YoutubeBaseInfoExtractor(InfoExtractor):
  57     """Provide base functions for Youtube extractors"""
  58     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  59     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  60
  61     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  62     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  63     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  64
  65     _NETRC_MACHINE = 'youtube'
  66     # If True it will raise an error if no login info is provided
  67     _LOGIN_REQUIRED = False
  68
  69     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  70
  71     def _set_language(self):
  72         self._set_cookie(
  73             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  74             # YouTube sets the expire time to about two months
  75             expire_time=time.time() + 2 * 30 * 24 * 3600)
  76
  77     def _ids_to_results(self, ids):
  78         return [
  79             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  80             for vid_id in ids]
  81
  82     def _login(self):
  83         """
  84         Attempt to log in to YouTube.
  85         True is returned if successful or skipped.
  86         False is returned if login failed.
  87
  88         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  89         """
  90         username, password = self._get_login_info()
  91         # No authentication to be performed
  92         if username is None:
  93             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  94                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  95             return True
  96
  97         login_page = self._download_webpage(
  98             self._LOGIN_URL, None,
  99             note='Downloading login page',
 100             errnote='unable to fetch login page', fatal=False)
 101         if login_page is False:
 102             return
 103
 104         login_form = self._hidden_inputs(login_page)
 105
 106         def req(url, f_req, note, errnote):
 107             data = login_form.copy()
 108             data.update({
 109                 'pstMsg': 1,
 110                 'checkConnection': 'youtube',
 111                 'checkedDomains': 'youtube',
 112                 'hl': 'en',
 113                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 114                 'f.req': json.dumps(f_req),
 115                 'flowName': 'GlifWebSignIn',
 116                 'flowEntry': 'ServiceLogin',
 117             })
 118             return self._download_json(
 119                 url, None, note=note, errnote=errnote,
 120                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 121                 fatal=False,
 122                 data=urlencode_postdata(data), headers={
 123                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 124                     'Google-Accounts-XSRF': 1,
 125                 })
 126
 127         def warn(message):
 128             self._downloader.report_warning(message)
 129
 130         lookup_req = [
 131             username,
 132             None, [], None, 'US', None, None, 2, False, True,
 133             [
 134                 None, None,
 135                 [2, 1, None, 1,
 136                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 137                  None, [], 4],
 138                 1, [None, None, []], None, None, None, True
 139             ],
 140             username,
 141         ]
 142
 143         lookup_results = req(
 144             self._LOOKUP_URL, lookup_req,
 145             'Looking up account info', 'Unable to look up account info')
 146
 147         if lookup_results is False:
 148             return False
 149
 150         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 151         if not user_hash:
 152             warn('Unable to extract user hash')
 153             return False
 154
 155         challenge_req = [
 156             user_hash,
 157             None, 1, None, [1, None, None, None, [password, None, True]],
 158             [
 159                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 160                 1, [None, None, []], None, None, None, True
 161             ]]
 162
 163         challenge_results = req(
 164             self._CHALLENGE_URL, challenge_req,
 165             'Logging in', 'Unable to log in')
 166
 167         if challenge_results is False:
 168             return
 169
 170         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 171         if login_res:
 172             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 173             warn(
 174                 'Unable to login: %s' % 'Invalid password'
 175                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 176             return False
 177
 178         res = try_get(challenge_results, lambda x: x[0][-1], list)
 179         if not res:
 180             warn('Unable to extract result entry')
 181             return False
 182
 183         login_challenge = try_get(res, lambda x: x[0][0], list)
 184         if login_challenge:
 185             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 186             if challenge_str == 'TWO_STEP_VERIFICATION':
 187                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 188                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 189                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 190                 if status == 'QUOTA_EXCEEDED':
 191                     warn('Exceeded the limit of TFA codes, try later')
 192                     return False
 193
 194                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 195                 if not tl:
 196                     warn('Unable to extract TL')
 197                     return False
 198
 199                 tfa_code = self._get_tfa_info('2-step verification code')
 200
 201                 if not tfa_code:
 202                     warn(
 203                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 204                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 205                     return False
 206
 207                 tfa_code = remove_start(tfa_code, 'G-')
 208
 209                 tfa_req = [
 210                     user_hash, None, 2, None,
 211                     [
 212                         9, None, None, None, None, None, None, None,
 213                         [None, tfa_code, True, 2]
 214                     ]]
 215
 216                 tfa_results = req(
 217                     self._TFA_URL.format(tl), tfa_req,
 218                     'Submitting TFA code', 'Unable to submit TFA code')
 219
 220                 if tfa_results is False:
 221                     return False
 222
 223                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 224                 if tfa_res:
 225                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 226                     warn(
 227                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 228                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 229                     return False
 230
 231                 check_cookie_url = try_get(
 232                     tfa_results, lambda x: x[0][-1][2], compat_str)
 233             else:
 234                 CHALLENGES = {
 235                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 236                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 237                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 238                 }
 239                 challenge = CHALLENGES.get(
 240                     challenge_str,
 241                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 242                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 243                 return False
 244         else:
 245             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 246
 247         if not check_cookie_url:
 248             warn('Unable to extract CheckCookie URL')
 249             return False
 250
 251         check_cookie_results = self._download_webpage(
 252             check_cookie_url, None, 'Checking cookie', fatal=False)
 253
 254         if check_cookie_results is False:
 255             return False
 256
 257         if 'https://myaccount.google.com/' not in check_cookie_results:
 258             warn('Unable to log in')
 259             return False
 260
 261         return True
 262
 263     def _download_webpage_handle(self, *args, **kwargs):
 264         query = kwargs.get('query', {}).copy()
 265         query['disable_polymer'] = 'true'
 266         kwargs['query'] = query
 267         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 268             *args, **compat_kwargs(kwargs))
 269
 270     def _real_initialize(self):
 271         if self._downloader is None:
 272             return
 273         self._set_language()
 274         if not self._login():
 275             return
 276
 277
 278 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 279     # Extract entries from page with "Load more" button
 280     def _entries(self, page, playlist_id):
 281         more_widget_html = content_html = page
 282         for page_num in itertools.count(1):
 283             for entry in self._process_page(content_html):
 284                 yield entry
 285
 286             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 287             if not mobj:
 288                 break
 289
 290             more = self._download_json(
 291                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 292                 'Downloading page #%s' % page_num,
 293                 transform_source=uppercase_escape)
 294             content_html = more['content_html']
 295             if not content_html.strip():
 296                 # Some webpages show a "Load more" button but they don't
 297                 # have more videos
 298                 break
 299             more_widget_html = more['load_more_widget_html']
 300
 301
 302 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 303     def _process_page(self, content):
 304         for video_id, video_title in self.extract_videos_from_page(content):
 305             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 306
 307     def extract_videos_from_page(self, page):
 308         ids_in_page = []
 309         titles_in_page = []
 310         for mobj in re.finditer(self._VIDEO_RE, page):
 311             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 312             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 313                 continue
 314             video_id = mobj.group('id')
 315             video_title = unescapeHTML(mobj.group('title'))
 316             if video_title:
 317                 video_title = video_title.strip()
 318             try:
 319                 idx = ids_in_page.index(video_id)
 320                 if video_title and not titles_in_page[idx]:
 321                     titles_in_page[idx] = video_title
 322             except ValueError:
 323                 ids_in_page.append(video_id)
 324                 titles_in_page.append(video_title)
 325         return zip(ids_in_page, titles_in_page)
 326
 327
 328 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 329     def _process_page(self, content):
 330         for playlist_id in orderedSet(re.findall(
 331                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 332                 content)):
 333             yield self.url_result(
 334                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 335
 336     def _real_extract(self, url):
 337         playlist_id = self._match_id(url)
 338         webpage = self._download_webpage(url, playlist_id)
 339         title = self._og_search_title(webpage, fatal=False)
 340         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 341
 342
 343 class YoutubeIE(YoutubeBaseInfoExtractor):
 344     IE_DESC = 'YouTube.com'
 345     _VALID_URL = r"""(?x)^
 346                      (
 347                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 348                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 349                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 350                             (?:www\.)?pwnyoutube\.com/|
 351                             (?:www\.)?hooktube\.com/|
 352                             (?:www\.)?yourepeat\.com/|
 353                             tube\.majestyc\.net/|
 354                             (?:(?:www|dev)\.)?invidio\.us/|
 355                             (?:www\.)?invidiou\.sh/|
 356                             (?:www\.)?invidious\.snopyta\.org/|
 357                             (?:www\.)?invidious\.kabi\.tk/|
 358                             (?:www\.)?vid\.wxzm\.sx/|
 359                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 360                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 361                          (?:                                                  # the various things that can precede the ID:
 362                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 363                              |(?:                                             # or the v= param in all its forms
 364                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 365                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 366                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 367                                  v=
 368                              )
 369                          ))
 370                          |(?:
 371                             youtu\.be|                                        # just youtu.be/xxxx
 372                             vid\.plus|                                        # or vid.plus/xxxx
 373                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 374                          )/
 375                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 376                          )
 377                      )?                                                       # all until now is optional -> you can pass the naked ID
 378                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 379                      (?!.*?\blist=
 380                         (?:
 381                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 382                             WL                                                # WL are handled by the watch later IE
 383                         )
 384                      )
 385                      (?(1).+)?                                                # if we found the ID, everything can follow
 386                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 387     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 388     _formats = {
 389         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 390         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 391         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 392         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 393         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 394         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 395         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 396         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 397         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 398         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 399         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 400         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 401         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 402         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 403         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 404         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 405         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 406         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 407
 408
 409         # 3D videos
 410         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 411         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 412         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 413         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 414         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 415         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 416         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 417
 418         # Apple HTTP Live Streaming
 419         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 420         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 421         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 422         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 423         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 424         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 425         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 426         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 427
 428         # DASH mp4 video
 429         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 430         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 431         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 432         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 433         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 434         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 435         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 436         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 437         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 438         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 439         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 440         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 441
 442         # Dash mp4 audio
 443         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 444         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 445         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 446         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 447         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 448         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 449         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 450
 451         # Dash webm
 452         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 453         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 454         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 455         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 456         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 457         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 458         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 459         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 461         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 462         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 463         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 464         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 465         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 466         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 467         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 468         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 469         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 470         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 471         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 472         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 473         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 474
 475         # Dash webm audio
 476         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 477         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 478
 479         # Dash webm audio with opus inside
 480         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 481         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 482         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 483
 484         # RTMP (unnamed)
 485         '_rtmp': {'protocol': 'rtmp'},
 486     }
 487     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 488
 489     _GEO_BYPASS = False
 490
 491     IE_NAME = 'youtube'
 492     _TESTS = [
 493         {
 494             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 495             'info_dict': {
 496                 'id': 'BaW_jenozKc',
 497                 'ext': 'mp4',
 498                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 499                 'uploader': 'Philipp Hagemeister',
 500                 'uploader_id': 'phihag',
 501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 502                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 503                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 504                 'upload_date': '20121002',
 505                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 506                 'categories': ['Science & Technology'],
 507                 'tags': ['youtube-dl'],
 508                 'duration': 10,
 509                 'view_count': int,
 510                 'like_count': int,
 511                 'dislike_count': int,
 512                 'start_time': 1,
 513                 'end_time': 9,
 514             }
 515         },
 516         {
 517             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 518             'note': 'Test generic use_cipher_signature video (#897)',
 519             'info_dict': {
 520                 'id': 'UxxajLWwzqY',
 521                 'ext': 'mp4',
 522                 'upload_date': '20120506',
 523                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 524                 'alt_title': 'I Love It (feat. Charli XCX)',
 525                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 526                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 527                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 528                          'iconic ep', 'iconic', 'love', 'it'],
 529                 'duration': 180,
 530                 'uploader': 'Icona Pop',
 531                 'uploader_id': 'IconaPop',
 532                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 533                 'creator': 'Icona Pop',
 534                 'track': 'I Love It (feat. Charli XCX)',
 535                 'artist': 'Icona Pop',
 536             }
 537         },
 538         {
 539             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 540             'note': 'Test VEVO video with age protection (#956)',
 541             'info_dict': {
 542                 'id': '07FYdnEawAQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20130703',
 545                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 546                 'alt_title': 'Tunnel Vision',
 547                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 548                 'duration': 419,
 549                 'uploader': 'justintimberlakeVEVO',
 550                 'uploader_id': 'justintimberlakeVEVO',
 551                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 552                 'creator': 'Justin Timberlake',
 553                 'track': 'Tunnel Vision',
 554                 'artist': 'Justin Timberlake',
 555                 'age_limit': 18,
 556             }
 557         },
 558         {
 559             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 560             'note': 'Embed-only video (#1746)',
 561             'info_dict': {
 562                 'id': 'yZIXLfi8CZQ',
 563                 'ext': 'mp4',
 564                 'upload_date': '20120608',
 565                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 566                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 567                 'uploader': 'SET India',
 568                 'uploader_id': 'setindia',
 569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 570                 'age_limit': 18,
 571             }
 572         },
 573         {
 574             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 575             'note': 'Use the first video ID in the URL',
 576             'info_dict': {
 577                 'id': 'BaW_jenozKc',
 578                 'ext': 'mp4',
 579                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 580                 'uploader': 'Philipp Hagemeister',
 581                 'uploader_id': 'phihag',
 582                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 583                 'upload_date': '20121002',
 584                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 585                 'categories': ['Science & Technology'],
 586                 'tags': ['youtube-dl'],
 587                 'duration': 10,
 588                 'view_count': int,
 589                 'like_count': int,
 590                 'dislike_count': int,
 591             },
 592             'params': {
 593                 'skip_download': True,
 594             },
 595         },
 596         {
 597             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 598             'note': '256k DASH audio (format 141) via DASH manifest',
 599             'info_dict': {
 600                 'id': 'a9LDPn-MO4I',
 601                 'ext': 'm4a',
 602                 'upload_date': '20121002',
 603                 'uploader_id': '8KVIDEO',
 604                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 605                 'description': '',
 606                 'uploader': '8KVIDEO',
 607                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 608             },
 609             'params': {
 610                 'youtube_include_dash_manifest': True,
 611                 'format': '141',
 612             },
 613             'skip': 'format 141 not served anymore',
 614         },
 615         # DASH manifest with encrypted signature
 616         {
 617             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 618             'info_dict': {
 619                 'id': 'IB3lcPjvWLA',
 620                 'ext': 'm4a',
 621                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 622                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 623                 'duration': 244,
 624                 'uploader': 'AfrojackVEVO',
 625                 'uploader_id': 'AfrojackVEVO',
 626                 'upload_date': '20131011',
 627             },
 628             'params': {
 629                 'youtube_include_dash_manifest': True,
 630                 'format': '141/bestaudio[ext=m4a]',
 631             },
 632         },
 633         # JS player signature function name containing $
 634         {
 635             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 636             'info_dict': {
 637                 'id': 'nfWlot6h_JM',
 638                 'ext': 'm4a',
 639                 'title': 'Taylor Swift - Shake It Off',
 640                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
 641                 'duration': 242,
 642                 'uploader': 'TaylorSwiftVEVO',
 643                 'uploader_id': 'TaylorSwiftVEVO',
 644                 'upload_date': '20140818',
 645                 'creator': 'Taylor Swift',
 646             },
 647             'params': {
 648                 'youtube_include_dash_manifest': True,
 649                 'format': '141/bestaudio[ext=m4a]',
 650             },
 651         },
 652         # Controversy video
 653         {
 654             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 655             'info_dict': {
 656                 'id': 'T4XJQO3qol8',
 657                 'ext': 'mp4',
 658                 'duration': 219,
 659                 'upload_date': '20100909',
 660                 'uploader': 'Amazing Atheist',
 661                 'uploader_id': 'TheAmazingAtheist',
 662                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 663                 'title': 'Burning Everyone\'s Koran',
 664                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 665             }
 666         },
 667         # Normal age-gate video (No vevo, embed allowed)
 668         {
 669             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 670             'info_dict': {
 671                 'id': 'HtVdAasjOgU',
 672                 'ext': 'mp4',
 673                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 674                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 675                 'duration': 142,
 676                 'uploader': 'The Witcher',
 677                 'uploader_id': 'WitcherGame',
 678                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 679                 'upload_date': '20140605',
 680                 'age_limit': 18,
 681             },
 682         },
 683         # Age-gate video with encrypted signature
 684         {
 685             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 686             'info_dict': {
 687                 'id': '6kLq3WMV1nU',
 688                 'ext': 'mp4',
 689                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 690                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 691                 'duration': 246,
 692                 'uploader': 'LloydVEVO',
 693                 'uploader_id': 'LloydVEVO',
 694                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 695                 'upload_date': '20110629',
 696                 'age_limit': 18,
 697             },
 698         },
 699         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 700         # YouTube Red ad is not captured for creator
 701         {
 702             'url': '__2ABJjxzNo',
 703             'info_dict': {
 704                 'id': '__2ABJjxzNo',
 705                 'ext': 'mp4',
 706                 'duration': 266,
 707                 'upload_date': '20100430',
 708                 'uploader_id': 'deadmau5',
 709                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 710                 'creator': 'deadmau5',
 711                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 712                 'uploader': 'deadmau5',
 713                 'title': 'Deadmau5 - Some Chords (HD)',
 714                 'alt_title': 'Some Chords',
 715             },
 716             'expected_warnings': [
 717                 'DASH manifest missing',
 718             ]
 719         },
 720         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 721         {
 722             'url': 'lqQg6PlCWgI',
 723             'info_dict': {
 724                 'id': 'lqQg6PlCWgI',
 725                 'ext': 'mp4',
 726                 'duration': 6085,
 727                 'upload_date': '20150827',
 728                 'uploader_id': 'olympic',
 729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 730                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 731                 'uploader': 'Olympic',
 732                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 733             },
 734             'params': {
 735                 'skip_download': 'requires avconv',
 736             }
 737         },
 738         # Non-square pixels
 739         {
 740             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 741             'info_dict': {
 742                 'id': '_b-2C3KPAM0',
 743                 'ext': 'mp4',
 744                 'stretched_ratio': 16 / 9.,
 745                 'duration': 85,
 746                 'upload_date': '20110310',
 747                 'uploader_id': 'AllenMeow',
 748                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 749                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 750                 'uploader': '孫ᄋᄅ',
 751                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 752             },
 753         },
 754         # url_encoded_fmt_stream_map is empty string
 755         {
 756             'url': 'qEJwOuvDf7I',
 757             'info_dict': {
 758                 'id': 'qEJwOuvDf7I',
 759                 'ext': 'webm',
 760                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 761                 'description': '',
 762                 'upload_date': '20150404',
 763                 'uploader_id': 'spbelect',
 764                 'uploader': 'Наблюдатели Петербурга',
 765             },
 766             'params': {
 767                 'skip_download': 'requires avconv',
 768             },
 769             'skip': 'This live event has ended.',
 770         },
 771         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 772         {
 773             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 774             'info_dict': {
 775                 'id': 'FIl7x6_3R5Y',
 776                 'ext': 'webm',
 777                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 778                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 779                 'duration': 220,
 780                 'upload_date': '20150625',
 781                 'uploader_id': 'dorappi2000',
 782                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 783                 'uploader': 'dorappi2000',
 784                 'formats': 'mincount:31',
 785             },
 786             'skip': 'not actual anymore',
 787         },
 788         # DASH manifest with segment_list
 789         {
 790             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 791             'md5': '8ce563a1d667b599d21064e982ab9e31',
 792             'info_dict': {
 793                 'id': 'CsmdDsKjzN8',
 794                 'ext': 'mp4',
 795                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 796                 'uploader': 'Airtek',
 797                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 798                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 799                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 800             },
 801             'params': {
 802                 'youtube_include_dash_manifest': True,
 803                 'format': '135',  # bestvideo
 804             },
 805             'skip': 'This live event has ended.',
 806         },
 807         {
 808             # Multifeed videos (multiple cameras), URL is for Main Camera
 809             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 810             'info_dict': {
 811                 'id': 'jqWvoWXjCVs',
 812                 'title': 'teamPGP: Rocket League Noob Stream',
 813                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 814             },
 815             'playlist': [{
 816                 'info_dict': {
 817                     'id': 'jqWvoWXjCVs',
 818                     'ext': 'mp4',
 819                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 820                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 821                     'duration': 7335,
 822                     'upload_date': '20150721',
 823                     'uploader': 'Beer Games Beer',
 824                     'uploader_id': 'beergamesbeer',
 825                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 826                     'license': 'Standard YouTube License',
 827                 },
 828             }, {
 829                 'info_dict': {
 830                     'id': '6h8e8xoXJzg',
 831                     'ext': 'mp4',
 832                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 833                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 834                     'duration': 7337,
 835                     'upload_date': '20150721',
 836                     'uploader': 'Beer Games Beer',
 837                     'uploader_id': 'beergamesbeer',
 838                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 839                     'license': 'Standard YouTube License',
 840                 },
 841             }, {
 842                 'info_dict': {
 843                     'id': 'PUOgX5z9xZw',
 844                     'ext': 'mp4',
 845                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 846                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 847                     'duration': 7337,
 848                     'upload_date': '20150721',
 849                     'uploader': 'Beer Games Beer',
 850                     'uploader_id': 'beergamesbeer',
 851                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 852                     'license': 'Standard YouTube License',
 853                 },
 854             }, {
 855                 'info_dict': {
 856                     'id': 'teuwxikvS5k',
 857                     'ext': 'mp4',
 858                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 859                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 860                     'duration': 7334,
 861                     'upload_date': '20150721',
 862                     'uploader': 'Beer Games Beer',
 863                     'uploader_id': 'beergamesbeer',
 864                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 865                     'license': 'Standard YouTube License',
 866                 },
 867             }],
 868             'params': {
 869                 'skip_download': True,
 870             },
 871             'skip': 'This video is not available.',
 872         },
 873         {
 874             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 875             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 876             'info_dict': {
 877                 'id': 'gVfLd0zydlo',
 878                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 879             },
 880             'playlist_count': 2,
 881             'skip': 'Not multifeed anymore',
 882         },
 883         {
 884             'url': 'https://vid.plus/FlRa-iH7PGw',
 885             'only_matching': True,
 886         },
 887         {
 888             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 889             'only_matching': True,
 890         },
 891         {
 892             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 893             # Also tests cut-off URL expansion in video description (see
 894             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 895             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 896             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 897             'info_dict': {
 898                 'id': 'lsguqyKfVQg',
 899                 'ext': 'mp4',
 900                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 901                 'alt_title': 'Dark Walk - Position Music',
 902                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 903                 'duration': 133,
 904                 'upload_date': '20151119',
 905                 'uploader_id': 'IronSoulElf',
 906                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 907                 'uploader': 'IronSoulElf',
 908                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 909                 'track': 'Dark Walk - Position Music',
 910                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 911             },
 912             'params': {
 913                 'skip_download': True,
 914             },
 915         },
 916         {
 917             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 918             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 919             'only_matching': True,
 920         },
 921         {
 922             # Video with yt:stretch=17:0
 923             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 924             'info_dict': {
 925                 'id': 'Q39EVAstoRM',
 926                 'ext': 'mp4',
 927                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 928                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 929                 'upload_date': '20151107',
 930                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 931                 'uploader': 'CH GAMER DROID',
 932             },
 933             'params': {
 934                 'skip_download': True,
 935             },
 936             'skip': 'This video does not exist.',
 937         },
 938         {
 939             # Video licensed under Creative Commons
 940             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 941             'info_dict': {
 942                 'id': 'M4gD1WSo5mA',
 943                 'ext': 'mp4',
 944                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 945                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 946                 'duration': 721,
 947                 'upload_date': '20150127',
 948                 'uploader_id': 'BerkmanCenter',
 949                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 950                 'uploader': 'The Berkman Klein Center for Internet & Society',
 951                 'license': 'Creative Commons Attribution license (reuse allowed)',
 952             },
 953             'params': {
 954                 'skip_download': True,
 955             },
 956         },
 957         {
 958             # Channel-like uploader_url
 959             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 960             'info_dict': {
 961                 'id': 'eQcmzGIKrzg',
 962                 'ext': 'mp4',
 963                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 964                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 965                 'duration': 4060,
 966                 'upload_date': '20151119',
 967                 'uploader': 'Bernie Sanders',
 968                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 970                 'license': 'Creative Commons Attribution license (reuse allowed)',
 971             },
 972             'params': {
 973                 'skip_download': True,
 974             },
 975         },
 976         {
 977             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 978             'only_matching': True,
 979         },
 980         {
 981             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 982             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 983             'only_matching': True,
 984         },
 985         {
 986             # Rental video preview
 987             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 988             'info_dict': {
 989                 'id': 'uGpuVWrhIzE',
 990                 'ext': 'mp4',
 991                 'title': 'Piku - Trailer',
 992                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 993                 'upload_date': '20150811',
 994                 'uploader': 'FlixMatrix',
 995                 'uploader_id': 'FlixMatrixKaravan',
 996                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 997                 'license': 'Standard YouTube License',
 998             },
 999             'params': {
1000                 'skip_download': True,
1001             },
1002             'skip': 'This video is not available.',
1003         },
1004         {
1005             # YouTube Red video with episode data
1006             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1007             'info_dict': {
1008                 'id': 'iqKdEhx-dD4',
1009                 'ext': 'mp4',
1010                 'title': 'Isolation - Mind Field (Ep 1)',
1011                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1012                 'duration': 2085,
1013                 'upload_date': '20170118',
1014                 'uploader': 'Vsauce',
1015                 'uploader_id': 'Vsauce',
1016                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1017                 'series': 'Mind Field',
1018                 'season_number': 1,
1019                 'episode_number': 1,
1020             },
1021             'params': {
1022                 'skip_download': True,
1023             },
1024             'expected_warnings': [
1025                 'Skipping DASH manifest',
1026             ],
1027         },
1028         {
1029             # The following content has been identified by the YouTube community
1030             # as inappropriate or offensive to some audiences.
1031             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1032             'info_dict': {
1033                 'id': '6SJNVb0GnPI',
1034                 'ext': 'mp4',
1035                 'title': 'Race Differences in Intelligence',
1036                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1037                 'duration': 965,
1038                 'upload_date': '20140124',
1039                 'uploader': 'New Century Foundation',
1040                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1041                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1042             },
1043             'params': {
1044                 'skip_download': True,
1045             },
1046         },
1047         {
1048             # itag 212
1049             'url': '1t24XAntNCY',
1050             'only_matching': True,
1051         },
1052         {
1053             # geo restricted to JP
1054             'url': 'sJL6WA-aGkQ',
1055             'only_matching': True,
1056         },
1057         {
1058             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1059             'only_matching': True,
1060         },
1061         {
1062             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1063             'only_matching': True,
1064         },
1065         {
1066             # DRM protected
1067             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1068             'only_matching': True,
1069         },
1070         {
1071             # Video with unsupported adaptive stream type formats
1072             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1073             'info_dict': {
1074                 'id': 'Z4Vy8R84T1U',
1075                 'ext': 'mp4',
1076                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1077                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1078                 'duration': 433,
1079                 'upload_date': '20130923',
1080                 'uploader': 'Amelia Putri Harwita',
1081                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1082                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1083                 'formats': 'maxcount:10',
1084             },
1085             'params': {
1086                 'skip_download': True,
1087                 'youtube_include_dash_manifest': False,
1088             },
1089         },
1090         {
1091             # Youtube Music Auto-generated description
1092             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1093             'info_dict': {
1094                 'id': 'MgNrAu2pzNs',
1095                 'ext': 'mp4',
1096                 'title': 'Voyeur Girl',
1097                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1098                 'upload_date': '20190312',
1099                 'uploader': 'Various Artists - Topic',
1100                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1101                 'artist': 'Stephen',
1102                 'track': 'Voyeur Girl',
1103                 'album': 'it\'s too much love to know my dear',
1104                 'release_date': '20190313',
1105                 'release_year': 2019,
1106             },
1107             'params': {
1108                 'skip_download': True,
1109             },
1110         },
1111         {
1112             # Youtube Music Auto-generated description
1113             # Retrieve 'artist' field from 'Artist:' in video description
1114             # when it is present on youtube music video
1115             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1116             'info_dict': {
1117                 'id': 'k0jLE7tTwjY',
1118                 'ext': 'mp4',
1119                 'title': 'Latch Feat. Sam Smith',
1120                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1121                 'upload_date': '20150110',
1122                 'uploader': 'Various Artists - Topic',
1123                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1124                 'artist': 'Disclosure',
1125                 'track': 'Latch Feat. Sam Smith',
1126                 'album': 'Latch Featuring Sam Smith',
1127                 'release_date': '20121008',
1128                 'release_year': 2012,
1129             },
1130             'params': {
1131                 'skip_download': True,
1132             },
1133         },
1134         {
1135             # Youtube Music Auto-generated description
1136             # handle multiple artists on youtube music video
1137             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1138             'info_dict': {
1139                 'id': '74qn0eJSjpA',
1140                 'ext': 'mp4',
1141                 'title': 'Eastside',
1142                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1143                 'upload_date': '20180710',
1144                 'uploader': 'Benny Blanco - Topic',
1145                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1146                 'artist': 'benny blanco, Halsey, Khalid',
1147                 'track': 'Eastside',
1148                 'album': 'Eastside',
1149                 'release_date': '20180713',
1150                 'release_year': 2018,
1151             },
1152             'params': {
1153                 'skip_download': True,
1154             },
1155         },
1156         {
1157             # Youtube Music Auto-generated description
1158             # handle youtube music video with release_year and no release_date
1159             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1160             'info_dict': {
1161                 'id': '-hcAI0g-f5M',
1162                 'ext': 'mp4',
1163                 'title': 'Put It On Me',
1164                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1165                 'upload_date': '20180426',
1166                 'uploader': 'Matt Maeson - Topic',
1167                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1168                 'artist': 'Matt Maeson',
1169                 'track': 'Put It On Me',
1170                 'album': 'The Hearse',
1171                 'release_date': None,
1172                 'release_year': 2018,
1173             },
1174             'params': {
1175                 'skip_download': True,
1176             },
1177         },
1178     ]
1179
1180     def __init__(self, *args, **kwargs):
1181         super(YoutubeIE, self).__init__(*args, **kwargs)
1182         self._player_cache = {}
1183
1184     def report_video_info_webpage_download(self, video_id):
1185         """Report attempt to download video info webpage."""
1186         self.to_screen('%s: Downloading video info webpage' % video_id)
1187
1188     def report_information_extraction(self, video_id):
1189         """Report attempt to extract video information."""
1190         self.to_screen('%s: Extracting video information' % video_id)
1191
1192     def report_unavailable_format(self, video_id, format):
1193         """Report extracted video URL."""
1194         self.to_screen('%s: Format %s not available' % (video_id, format))
1195
1196     def report_rtmp_download(self):
1197         """Indicate the download will use the RTMP protocol."""
1198         self.to_screen('RTMP download detected')
1199
1200     def _signature_cache_id(self, example_sig):
1201         """ Return a string representation of a signature """
1202         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1203
1204     def _extract_signature_function(self, video_id, player_url, example_sig):
1205         id_m = re.match(
1206             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1207             player_url)
1208         if not id_m:
1209             raise ExtractorError('Cannot identify player %r' % player_url)
1210         player_type = id_m.group('ext')
1211         player_id = id_m.group('id')
1212
1213         # Read from filesystem cache
1214         func_id = '%s_%s_%s' % (
1215             player_type, player_id, self._signature_cache_id(example_sig))
1216         assert os.path.basename(func_id) == func_id
1217
1218         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1219         if cache_spec is not None:
1220             return lambda s: ''.join(s[i] for i in cache_spec)
1221
1222         download_note = (
1223             'Downloading player %s' % player_url
1224             if self._downloader.params.get('verbose') else
1225             'Downloading %s player %s' % (player_type, player_id)
1226         )
1227         if player_type == 'js':
1228             code = self._download_webpage(
1229                 player_url, video_id,
1230                 note=download_note,
1231                 errnote='Download of %s failed' % player_url)
1232             res = self._parse_sig_js(code)
1233         elif player_type == 'swf':
1234             urlh = self._request_webpage(
1235                 player_url, video_id,
1236                 note=download_note,
1237                 errnote='Download of %s failed' % player_url)
1238             code = urlh.read()
1239             res = self._parse_sig_swf(code)
1240         else:
1241             assert False, 'Invalid player type %r' % player_type
1242
1243         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1244         cache_res = res(test_string)
1245         cache_spec = [ord(c) for c in cache_res]
1246
1247         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1248         return res
1249
1250     def _print_sig_code(self, func, example_sig):
1251         def gen_sig_code(idxs):
1252             def _genslice(start, end, step):
1253                 starts = '' if start == 0 else str(start)
1254                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1255                 steps = '' if step == 1 else (':%d' % step)
1256                 return 's[%s%s%s]' % (starts, ends, steps)
1257
1258             step = None
1259             # Quelch pyflakes warnings - start will be set when step is set
1260             start = '(Never used)'
1261             for i, prev in zip(idxs[1:], idxs[:-1]):
1262                 if step is not None:
1263                     if i - prev == step:
1264                         continue
1265                     yield _genslice(start, prev, step)
1266                     step = None
1267                     continue
1268                 if i - prev in [-1, 1]:
1269                     step = i - prev
1270                     start = prev
1271                     continue
1272                 else:
1273                     yield 's[%d]' % prev
1274             if step is None:
1275                 yield 's[%d]' % i
1276             else:
1277                 yield _genslice(start, i, step)
1278
1279         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1280         cache_res = func(test_string)
1281         cache_spec = [ord(c) for c in cache_res]
1282         expr_code = ' + '.join(gen_sig_code(cache_spec))
1283         signature_id_tuple = '(%s)' % (
1284             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1285         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1286                 '    return %s\n') % (signature_id_tuple, expr_code)
1287         self.to_screen('Extracted signature function:\n' + code)
1288
1289     def _parse_sig_js(self, jscode):
1290         funcname = self._search_regex(
1291             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1292              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1293              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
1294              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1295              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1296             jscode, 'Initial JS player signature function name', group='sig')
1297
1298         jsi = JSInterpreter(jscode)
1299         initial_function = jsi.extract_function(funcname)
1300         return lambda s: initial_function([s])
1301
1302     def _parse_sig_swf(self, file_contents):
1303         swfi = SWFInterpreter(file_contents)
1304         TARGET_CLASSNAME = 'SignatureDecipher'
1305         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1306         initial_function = swfi.extract_function(searched_class, 'decipher')
1307         return lambda s: initial_function([s])
1308
1309     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1310         """Turn the encrypted s field into a working signature"""
1311
1312         if player_url is None:
1313             raise ExtractorError('Cannot decrypt signature without player_url')
1314
1315         if player_url.startswith('//'):
1316             player_url = 'https:' + player_url
1317         elif not re.match(r'https?://', player_url):
1318             player_url = compat_urlparse.urljoin(
1319                 'https://www.youtube.com', player_url)
1320         try:
1321             player_id = (player_url, self._signature_cache_id(s))
1322             if player_id not in self._player_cache:
1323                 func = self._extract_signature_function(
1324                     video_id, player_url, s
1325                 )
1326                 self._player_cache[player_id] = func
1327             func = self._player_cache[player_id]
1328             if self._downloader.params.get('youtube_print_sig_code'):
1329                 self._print_sig_code(func, s)
1330             return func(s)
1331         except Exception as e:
1332             tb = traceback.format_exc()
1333             raise ExtractorError(
1334                 'Signature extraction failed: ' + tb, cause=e)
1335
1336     def _get_subtitles(self, video_id, webpage):
1337         try:
1338             subs_doc = self._download_xml(
1339                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1340                 video_id, note=False)
1341         except ExtractorError as err:
1342             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1343             return {}
1344
1345         sub_lang_list = {}
1346         for track in subs_doc.findall('track'):
1347             lang = track.attrib['lang_code']
1348             if lang in sub_lang_list:
1349                 continue
1350             sub_formats = []
1351             for ext in self._SUBTITLE_FORMATS:
1352                 params = compat_urllib_parse_urlencode({
1353                     'lang': lang,
1354                     'v': video_id,
1355                     'fmt': ext,
1356                     'name': track.attrib['name'].encode('utf-8'),
1357                 })
1358                 sub_formats.append({
1359                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1360                     'ext': ext,
1361                 })
1362             sub_lang_list[lang] = sub_formats
1363         if not sub_lang_list:
1364             self._downloader.report_warning('video doesn\'t have subtitles')
1365             return {}
1366         return sub_lang_list
1367
1368     def _get_ytplayer_config(self, video_id, webpage):
1369         patterns = (
1370             # User data may contain arbitrary character sequences that may affect
1371             # JSON extraction with regex, e.g. when '};' is contained the second
1372             # regex won't capture the whole JSON. Yet working around by trying more
1373             # concrete regex first keeping in mind proper quoted string handling
1374             # to be implemented in future that will replace this workaround (see
1375             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1376             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1377             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1378             r';ytplayer\.config\s*=\s*({.+?});',
1379         )
1380         config = self._search_regex(
1381             patterns, webpage, 'ytplayer.config', default=None)
1382         if config:
1383             return self._parse_json(
1384                 uppercase_escape(config), video_id, fatal=False)
1385
1386     def _get_automatic_captions(self, video_id, webpage):
1387         """We need the webpage for getting the captions url, pass it as an
1388            argument to speed up the process."""
1389         self.to_screen('%s: Looking for automatic captions' % video_id)
1390         player_config = self._get_ytplayer_config(video_id, webpage)
1391         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1392         if not player_config:
1393             self._downloader.report_warning(err_msg)
1394             return {}
1395         try:
1396             args = player_config['args']
1397             caption_url = args.get('ttsurl')
1398             if caption_url:
1399                 timestamp = args['timestamp']
1400                 # We get the available subtitles
1401                 list_params = compat_urllib_parse_urlencode({
1402                     'type': 'list',
1403                     'tlangs': 1,
1404                     'asrs': 1,
1405                 })
1406                 list_url = caption_url + '&' + list_params
1407                 caption_list = self._download_xml(list_url, video_id)
1408                 original_lang_node = caption_list.find('track')
1409                 if original_lang_node is None:
1410                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1411                     return {}
1412                 original_lang = original_lang_node.attrib['lang_code']
1413                 caption_kind = original_lang_node.attrib.get('kind', '')
1414
1415                 sub_lang_list = {}
1416                 for lang_node in caption_list.findall('target'):
1417                     sub_lang = lang_node.attrib['lang_code']
1418                     sub_formats = []
1419                     for ext in self._SUBTITLE_FORMATS:
1420                         params = compat_urllib_parse_urlencode({
1421                             'lang': original_lang,
1422                             'tlang': sub_lang,
1423                             'fmt': ext,
1424                             'ts': timestamp,
1425                             'kind': caption_kind,
1426                         })
1427                         sub_formats.append({
1428                             'url': caption_url + '&' + params,
1429                             'ext': ext,
1430                         })
1431                     sub_lang_list[sub_lang] = sub_formats
1432                 return sub_lang_list
1433
1434             def make_captions(sub_url, sub_langs):
1435                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1436                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1437                 captions = {}
1438                 for sub_lang in sub_langs:
1439                     sub_formats = []
1440                     for ext in self._SUBTITLE_FORMATS:
1441                         caption_qs.update({
1442                             'tlang': [sub_lang],
1443                             'fmt': [ext],
1444                         })
1445                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1446                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1447                         sub_formats.append({
1448                             'url': sub_url,
1449                             'ext': ext,
1450                         })
1451                     captions[sub_lang] = sub_formats
1452                 return captions
1453
1454             # New captions format as of 22.06.2017
1455             player_response = args.get('player_response')
1456             if player_response and isinstance(player_response, compat_str):
1457                 player_response = self._parse_json(
1458                     player_response, video_id, fatal=False)
1459                 if player_response:
1460                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1461                     base_url = renderer['captionTracks'][0]['baseUrl']
1462                     sub_lang_list = []
1463                     for lang in renderer['translationLanguages']:
1464                         lang_code = lang.get('languageCode')
1465                         if lang_code:
1466                             sub_lang_list.append(lang_code)
1467                     return make_captions(base_url, sub_lang_list)
1468
1469             # Some videos don't provide ttsurl but rather caption_tracks and
1470             # caption_translation_languages (e.g. 20LmZk1hakA)
1471             # Does not used anymore as of 22.06.2017
1472             caption_tracks = args['caption_tracks']
1473             caption_translation_languages = args['caption_translation_languages']
1474             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1475             sub_lang_list = []
1476             for lang in caption_translation_languages.split(','):
1477                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1478                 sub_lang = lang_qs.get('lc', [None])[0]
1479                 if sub_lang:
1480                     sub_lang_list.append(sub_lang)
1481             return make_captions(caption_url, sub_lang_list)
1482         # An extractor error can be raise by the download process if there are
1483         # no automatic captions but there are subtitles
1484         except (KeyError, IndexError, ExtractorError):
1485             self._downloader.report_warning(err_msg)
1486             return {}
1487
1488     def _mark_watched(self, video_id, video_info, player_response):
1489         playback_url = url_or_none(try_get(
1490             player_response,
1491             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1492             video_info, lambda x: x['videostats_playback_base_url'][0]))
1493         if not playback_url:
1494             return
1495         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1496         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1497
1498         # cpn generation algorithm is reverse engineered from base.js.
1499         # In fact it works even with dummy cpn.
1500         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1501         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1502
1503         qs.update({
1504             'ver': ['2'],
1505             'cpn': [cpn],
1506         })
1507         playback_url = compat_urlparse.urlunparse(
1508             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1509
1510         self._download_webpage(
1511             playback_url, video_id, 'Marking watched',
1512             'Unable to mark watched', fatal=False)
1513
1514     @staticmethod
1515     def _extract_urls(webpage):
1516         # Embedded YouTube player
1517         entries = [
1518             unescapeHTML(mobj.group('url'))
1519             for mobj in re.finditer(r'''(?x)
1520             (?:
1521                 <iframe[^>]+?src=|
1522                 data-video-url=|
1523                 <embed[^>]+?src=|
1524                 embedSWF\(?:\s*|
1525                 <object[^>]+data=|
1526                 new\s+SWFObject\(
1527             )
1528             (["\'])
1529                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1530                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1531             \1''', webpage)]
1532
1533         # lazyYT YouTube embed
1534         entries.extend(list(map(
1535             unescapeHTML,
1536             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1537
1538         # Wordpress "YouTube Video Importer" plugin
1539         matches = re.findall(r'''(?x)<div[^>]+
1540             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1541             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1542         entries.extend(m[-1] for m in matches)
1543
1544         return entries
1545
1546     @staticmethod
1547     def _extract_url(webpage):
1548         urls = YoutubeIE._extract_urls(webpage)
1549         return urls[0] if urls else None
1550
1551     @classmethod
1552     def extract_id(cls, url):
1553         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1554         if mobj is None:
1555             raise ExtractorError('Invalid URL: %s' % url)
1556         video_id = mobj.group(2)
1557         return video_id
1558
1559     def _extract_annotations(self, video_id):
1560         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1561         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1562
1563     @staticmethod
1564     def _extract_chapters(description, duration):
1565         if not description:
1566             return None
1567         chapter_lines = re.findall(
1568             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1569             description)
1570         if not chapter_lines:
1571             return None
1572         chapters = []
1573         for next_num, (chapter_line, time_point) in enumerate(
1574                 chapter_lines, start=1):
1575             start_time = parse_duration(time_point)
1576             if start_time is None:
1577                 continue
1578             if start_time > duration:
1579                 break
1580             end_time = (duration if next_num == len(chapter_lines)
1581                         else parse_duration(chapter_lines[next_num][1]))
1582             if end_time is None:
1583                 continue
1584             if end_time > duration:
1585                 end_time = duration
1586             if start_time > end_time:
1587                 break
1588             chapter_title = re.sub(
1589                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1590             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1591             chapters.append({
1592                 'start_time': start_time,
1593                 'end_time': end_time,
1594                 'title': chapter_title,
1595             })
1596         return chapters
1597
1598     def _real_extract(self, url):
1599         url, smuggled_data = unsmuggle_url(url, {})
1600
1601         proto = (
1602             'http' if self._downloader.params.get('prefer_insecure', False)
1603             else 'https')
1604
1605         start_time = None
1606         end_time = None
1607         parsed_url = compat_urllib_parse_urlparse(url)
1608         for component in [parsed_url.fragment, parsed_url.query]:
1609             query = compat_parse_qs(component)
1610             if start_time is None and 't' in query:
1611                 start_time = parse_duration(query['t'][0])
1612             if start_time is None and 'start' in query:
1613                 start_time = parse_duration(query['start'][0])
1614             if end_time is None and 'end' in query:
1615                 end_time = parse_duration(query['end'][0])
1616
1617         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1618         mobj = re.search(self._NEXT_URL_RE, url)
1619         if mobj:
1620             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1621         video_id = self.extract_id(url)
1622
1623         # Get video webpage
1624         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1625         video_webpage = self._download_webpage(url, video_id)
1626
1627         # Attempt to extract SWF player URL
1628         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1629         if mobj is not None:
1630             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1631         else:
1632             player_url = None
1633
1634         dash_mpds = []
1635
1636         def add_dash_mpd(video_info):
1637             dash_mpd = video_info.get('dashmpd')
1638             if dash_mpd and dash_mpd[0] not in dash_mpds:
1639                 dash_mpds.append(dash_mpd[0])
1640
1641         def add_dash_mpd_pr(pl_response):
1642             dash_mpd = url_or_none(try_get(
1643                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1644                 compat_str))
1645             if dash_mpd and dash_mpd not in dash_mpds:
1646                 dash_mpds.append(dash_mpd)
1647
1648         is_live = None
1649         view_count = None
1650
1651         def extract_view_count(v_info):
1652             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1653
1654         player_response = {}
1655
1656         # Get video info
1657         embed_webpage = None
1658         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1659             age_gate = True
1660             # We simulate the access to the video from www.youtube.com/v/{video_id}
1661             # this can be viewed without login into Youtube
1662             url = proto + '://www.youtube.com/embed/%s' % video_id
1663             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1664             data = compat_urllib_parse_urlencode({
1665                 'video_id': video_id,
1666                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1667                 'sts': self._search_regex(
1668                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1669             })
1670             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1671             video_info_webpage = self._download_webpage(
1672                 video_info_url, video_id,
1673                 note='Refetching age-gated info webpage',
1674                 errnote='unable to download video info webpage')
1675             video_info = compat_parse_qs(video_info_webpage)
1676             add_dash_mpd(video_info)
1677         else:
1678             age_gate = False
1679             video_info = None
1680             sts = None
1681             # Try looking directly into the video webpage
1682             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1683             if ytplayer_config:
1684                 args = ytplayer_config['args']
1685                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1686                     # Convert to the same format returned by compat_parse_qs
1687                     video_info = dict((k, [v]) for k, v in args.items())
1688                     add_dash_mpd(video_info)
1689                 # Rental video is not rented but preview is available (e.g.
1690                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1691                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1692                 if not video_info and args.get('ypc_vid'):
1693                     return self.url_result(
1694                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1695                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1696                     is_live = True
1697                 sts = ytplayer_config.get('sts')
1698                 if not player_response:
1699                     pl_response = str_or_none(args.get('player_response'))
1700                     if pl_response:
1701                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1702                         if isinstance(pl_response, dict):
1703                             player_response = pl_response
1704             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1705                 add_dash_mpd_pr(player_response)
1706                 # We also try looking in get_video_info since it may contain different dashmpd
1707                 # URL that points to a DASH manifest with possibly different itag set (some itags
1708                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1709                 # manifest pointed by get_video_info's dashmpd).
1710                 # The general idea is to take a union of itags of both DASH manifests (for example
1711                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1712                 self.report_video_info_webpage_download(video_id)
1713                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1714                     query = {
1715                         'video_id': video_id,
1716                         'ps': 'default',
1717                         'eurl': '',
1718                         'gl': 'US',
1719                         'hl': 'en',
1720                     }
1721                     if el:
1722                         query['el'] = el
1723                     if sts:
1724                         query['sts'] = sts
1725                     video_info_webpage = self._download_webpage(
1726                         '%s://www.youtube.com/get_video_info' % proto,
1727                         video_id, note=False,
1728                         errnote='unable to download video info webpage',
1729                         fatal=False, query=query)
1730                     if not video_info_webpage:
1731                         continue
1732                     get_video_info = compat_parse_qs(video_info_webpage)
1733                     if not player_response:
1734                         pl_response = get_video_info.get('player_response', [None])[0]
1735                         if isinstance(pl_response, dict):
1736                             player_response = pl_response
1737                             add_dash_mpd_pr(player_response)
1738                     add_dash_mpd(get_video_info)
1739                     if view_count is None:
1740                         view_count = extract_view_count(get_video_info)
1741                     if not video_info:
1742                         video_info = get_video_info
1743                     get_token = get_video_info.get('token') or get_video_info.get('account_playback_token')
1744                     if get_token:
1745                         # Different get_video_info requests may report different results, e.g.
1746                         # some may report video unavailability, but some may serve it without
1747                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1748                         # the original webpage as well as el=info and el=embedded get_video_info
1749                         # requests report video unavailability due to geo restriction while
1750                         # el=detailpage succeeds and returns valid data). This is probably
1751                         # due to YouTube measures against IP ranges of hosting providers.
1752                         # Working around by preferring the first succeeded video_info containing
1753                         # the token if no such video_info yet was found.
1754                         token = video_info.get('token') or video_info.get('account_playback_token')
1755                         if not token:
1756                             video_info = get_video_info
1757                         break
1758
1759         def extract_unavailable_message():
1760             return self._html_search_regex(
1761                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1762                 video_webpage, 'unavailable message', default=None)
1763
1764         if not video_info:
1765             unavailable_message = extract_unavailable_message()
1766             if not unavailable_message:
1767                 unavailable_message = 'Unable to extract video data'
1768             raise ExtractorError(
1769                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1770
1771         token = video_info.get('token') or video_info.get('account_playback_token')
1772         if not token:
1773             if 'reason' in video_info:
1774                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1775                     regions_allowed = self._html_search_meta(
1776                         'regionsAllowed', video_webpage, default=None)
1777                     countries = regions_allowed.split(',') if regions_allowed else None
1778                     self.raise_geo_restricted(
1779                         msg=video_info['reason'][0], countries=countries)
1780                 reason = video_info['reason'][0]
1781                 if 'Invalid parameters' in reason:
1782                     unavailable_message = extract_unavailable_message()
1783                     if unavailable_message:
1784                         reason = unavailable_message
1785                 raise ExtractorError(
1786                     'YouTube said: %s' % reason,
1787                     expected=True, video_id=video_id)
1788             else:
1789                 raise ExtractorError(
1790                     '"token" parameter not in video info for unknown reason',
1791                     video_id=video_id)
1792
1793         if video_info.get('license_info'):
1794             raise ExtractorError('This video is DRM protected.', expected=True)
1795
1796         video_details = try_get(
1797             player_response, lambda x: x['videoDetails'], dict) or {}
1798
1799         # title
1800         if 'title' in video_info:
1801             video_title = video_info['title'][0]
1802         elif 'title' in player_response:
1803             video_title = video_details['title']
1804         else:
1805             self._downloader.report_warning('Unable to extract video title')
1806             video_title = '_'
1807
1808         # description
1809         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1810         if video_description:
1811
1812             def replace_url(m):
1813                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1814                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1815                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1816                     qs = compat_parse_qs(parsed_redir_url.query)
1817                     q = qs.get('q')
1818                     if q and q[0]:
1819                         return q[0]
1820                 return redir_url
1821
1822             description_original = video_description = re.sub(r'''(?x)
1823                 <a\s+
1824                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1825                     (?:title|href)="([^"]+)"\s+
1826                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1827                     class="[^"]*"[^>]*>
1828                 [^<]+\.{3}\s*
1829                 </a>
1830             ''', replace_url, video_description)
1831             video_description = clean_html(video_description)
1832         else:
1833             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1834             if fd_mobj:
1835                 video_description = unescapeHTML(fd_mobj.group(1))
1836             else:
1837                 video_description = ''
1838
1839         if not smuggled_data.get('force_singlefeed', False):
1840             if not self._downloader.params.get('noplaylist'):
1841                 multifeed_metadata_list = try_get(
1842                     player_response,
1843                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1844                     compat_str) or try_get(
1845                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1846                 if multifeed_metadata_list:
1847                     entries = []
1848                     feed_ids = []
1849                     for feed in multifeed_metadata_list.split(','):
1850                         # Unquote should take place before split on comma (,) since textual
1851                         # fields may contain comma as well (see
1852                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1853                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1854                         entries.append({
1855                             '_type': 'url_transparent',
1856                             'ie_key': 'Youtube',
1857                             'url': smuggle_url(
1858                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1859                                 {'force_singlefeed': True}),
1860                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1861                         })
1862                         feed_ids.append(feed_data['id'][0])
1863                     self.to_screen(
1864                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1865                         % (', '.join(feed_ids), video_id))
1866                     return self.playlist_result(entries, video_id, video_title, video_description)
1867             else:
1868                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1869
1870         if view_count is None:
1871             view_count = extract_view_count(video_info)
1872         if view_count is None and video_details:
1873             view_count = int_or_none(video_details.get('viewCount'))
1874
1875         # Check for "rental" videos
1876         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1877             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1878
1879         def _extract_filesize(media_url):
1880             return int_or_none(self._search_regex(
1881                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1882
1883         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1884             self.report_rtmp_download()
1885             formats = [{
1886                 'format_id': '_rtmp',
1887                 'protocol': 'rtmp',
1888                 'url': video_info['conn'][0],
1889                 'player_url': player_url,
1890             }]
1891         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1892             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1893             if 'rtmpe%3Dyes' in encoded_url_map:
1894                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1895             formats_spec = {}
1896             fmt_list = video_info.get('fmt_list', [''])[0]
1897             if fmt_list:
1898                 for fmt in fmt_list.split(','):
1899                     spec = fmt.split('/')
1900                     if len(spec) > 1:
1901                         width_height = spec[1].split('x')
1902                         if len(width_height) == 2:
1903                             formats_spec[spec[0]] = {
1904                                 'resolution': spec[1],
1905                                 'width': int_or_none(width_height[0]),
1906                                 'height': int_or_none(width_height[1]),
1907                             }
1908             q = qualities(['small', 'medium', 'hd720'])
1909             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1910             if streaming_formats:
1911                 for fmt in streaming_formats:
1912                     itag = str_or_none(fmt.get('itag'))
1913                     if not itag:
1914                         continue
1915                     quality = fmt.get('quality')
1916                     quality_label = fmt.get('qualityLabel') or quality
1917                     formats_spec[itag] = {
1918                         'asr': int_or_none(fmt.get('audioSampleRate')),
1919                         'filesize': int_or_none(fmt.get('contentLength')),
1920                         'format_note': quality_label,
1921                         'fps': int_or_none(fmt.get('fps')),
1922                         'height': int_or_none(fmt.get('height')),
1923                         'quality': q(quality),
1924                         # bitrate for itag 43 is always 2147483647
1925                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1926                         'width': int_or_none(fmt.get('width')),
1927                     }
1928             formats = []
1929             for url_data_str in encoded_url_map.split(','):
1930                 url_data = compat_parse_qs(url_data_str)
1931                 if 'itag' not in url_data or 'url' not in url_data:
1932                     continue
1933                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1934                 # Unsupported FORMAT_STREAM_TYPE_OTF
1935                 if stream_type == 3:
1936                     continue
1937                 format_id = url_data['itag'][0]
1938                 url = url_data['url'][0]
1939
1940                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1941                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1942                     jsplayer_url_json = self._search_regex(
1943                         ASSETS_RE,
1944                         embed_webpage if age_gate else video_webpage,
1945                         'JS player URL (1)', default=None)
1946                     if not jsplayer_url_json and not age_gate:
1947                         # We need the embed website after all
1948                         if embed_webpage is None:
1949                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1950                             embed_webpage = self._download_webpage(
1951                                 embed_url, video_id, 'Downloading embed webpage')
1952                         jsplayer_url_json = self._search_regex(
1953                             ASSETS_RE, embed_webpage, 'JS player URL')
1954
1955                     player_url = json.loads(jsplayer_url_json)
1956                     if player_url is None:
1957                         player_url_json = self._search_regex(
1958                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1959                             video_webpage, 'age gate player URL')
1960                         player_url = json.loads(player_url_json)
1961
1962                 if 'sig' in url_data:
1963                     url += '&signature=' + url_data['sig'][0]
1964                 elif 's' in url_data:
1965                     encrypted_sig = url_data['s'][0]
1966
1967                     if self._downloader.params.get('verbose'):
1968                         if player_url is None:
1969                             player_version = 'unknown'
1970                             player_desc = 'unknown'
1971                         else:
1972                             if player_url.endswith('swf'):
1973                                 player_version = self._search_regex(
1974                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1975                                     'flash player', fatal=False)
1976                                 player_desc = 'flash player %s' % player_version
1977                             else:
1978                                 player_version = self._search_regex(
1979                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1980                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1981                                     player_url,
1982                                     'html5 player', fatal=False)
1983                                 player_desc = 'html5 player %s' % player_version
1984
1985                         parts_sizes = self._signature_cache_id(encrypted_sig)
1986                         self.to_screen('{%s} signature length %s, %s' %
1987                                        (format_id, parts_sizes, player_desc))
1988
1989                     signature = self._decrypt_signature(
1990                         encrypted_sig, video_id, player_url, age_gate)
1991                     url += '&signature=' + signature
1992                 if 'ratebypass' not in url:
1993                     url += '&ratebypass=yes'
1994
1995                 dct = {
1996                     'format_id': format_id,
1997                     'url': url,
1998                     'player_url': player_url,
1999                 }
2000                 if format_id in self._formats:
2001                     dct.update(self._formats[format_id])
2002                 if format_id in formats_spec:
2003                     dct.update(formats_spec[format_id])
2004
2005                 # Some itags are not included in DASH manifest thus corresponding formats will
2006                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2007                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2008                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2009                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2010
2011                 filesize = int_or_none(url_data.get(
2012                     'clen', [None])[0]) or _extract_filesize(url)
2013
2014                 quality = url_data.get('quality', [None])[0]
2015
2016                 more_fields = {
2017                     'filesize': filesize,
2018                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2019                     'width': width,
2020                     'height': height,
2021                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2022                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2023                     'quality': q(quality),
2024                 }
2025                 for key, value in more_fields.items():
2026                     if value:
2027                         dct[key] = value
2028                 type_ = url_data.get('type', [None])[0]
2029                 if type_:
2030                     type_split = type_.split(';')
2031                     kind_ext = type_split[0].split('/')
2032                     if len(kind_ext) == 2:
2033                         kind, _ = kind_ext
2034                         dct['ext'] = mimetype2ext(type_split[0])
2035                         if kind in ('audio', 'video'):
2036                             codecs = None
2037                             for mobj in re.finditer(
2038                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2039                                 if mobj.group('key') == 'codecs':
2040                                     codecs = mobj.group('val')
2041                                     break
2042                             if codecs:
2043                                 dct.update(parse_codecs(codecs))
2044                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2045                     dct['downloader_options'] = {
2046                         # Youtube throttles chunks >~10M
2047                         'http_chunk_size': 10485760,
2048                     }
2049                 formats.append(dct)
2050         else:
2051             manifest_url = (
2052                 url_or_none(try_get(
2053                     player_response,
2054                     lambda x: x['streamingData']['hlsManifestUrl'],
2055                     compat_str)) or
2056                 url_or_none(try_get(
2057                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2058             if manifest_url:
2059                 formats = []
2060                 m3u8_formats = self._extract_m3u8_formats(
2061                     manifest_url, video_id, 'mp4', fatal=False)
2062                 for a_format in m3u8_formats:
2063                     itag = self._search_regex(
2064                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2065                     if itag:
2066                         a_format['format_id'] = itag
2067                         if itag in self._formats:
2068                             dct = self._formats[itag].copy()
2069                             dct.update(a_format)
2070                             a_format = dct
2071                     a_format['player_url'] = player_url
2072                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2073                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2074                     formats.append(a_format)
2075             else:
2076                 error_message = clean_html(video_info.get('reason', [None])[0])
2077                 if not error_message:
2078                     error_message = extract_unavailable_message()
2079                 if error_message:
2080                     raise ExtractorError(error_message, expected=True)
2081                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2082
2083         # uploader
2084         video_uploader = try_get(
2085             video_info, lambda x: x['author'][0],
2086             compat_str) or str_or_none(video_details.get('author'))
2087         if video_uploader:
2088             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2089         else:
2090             self._downloader.report_warning('unable to extract uploader name')
2091
2092         # uploader_id
2093         video_uploader_id = None
2094         video_uploader_url = None
2095         mobj = re.search(
2096             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2097             video_webpage)
2098         if mobj is not None:
2099             video_uploader_id = mobj.group('uploader_id')
2100             video_uploader_url = mobj.group('uploader_url')
2101         else:
2102             self._downloader.report_warning('unable to extract uploader nickname')
2103
2104         channel_id = self._html_search_meta(
2105             'channelId', video_webpage, 'channel id')
2106         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2107
2108         # thumbnail image
2109         # We try first to get a high quality image:
2110         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2111                             video_webpage, re.DOTALL)
2112         if m_thumb is not None:
2113             video_thumbnail = m_thumb.group(1)
2114         elif 'thumbnail_url' not in video_info:
2115             self._downloader.report_warning('unable to extract video thumbnail')
2116             video_thumbnail = None
2117         else:   # don't panic if we can't find it
2118             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2119
2120         # upload date
2121         upload_date = self._html_search_meta(
2122             'datePublished', video_webpage, 'upload date', default=None)
2123         if not upload_date:
2124             upload_date = self._search_regex(
2125                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2126                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2127                 video_webpage, 'upload date', default=None)
2128         upload_date = unified_strdate(upload_date)
2129
2130         video_license = self._html_search_regex(
2131             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2132             video_webpage, 'license', default=None)
2133
2134         m_music = re.search(
2135             r'''(?x)
2136                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2137                 <ul[^>]*>\s*
2138                 <li>(?P<title>.+?)
2139                 by (?P<creator>.+?)
2140                 (?:
2141                     \(.+?\)|
2142                     <a[^>]*
2143                         (?:
2144                             \bhref=["\']/red[^>]*>|             # drop possible
2145                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2146                         )
2147                     .*?
2148                 )?</li
2149             ''',
2150             video_webpage)
2151         if m_music:
2152             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2153             video_creator = clean_html(m_music.group('creator'))
2154         else:
2155             video_alt_title = video_creator = None
2156
2157         def extract_meta(field):
2158             return self._html_search_regex(
2159                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2160                 video_webpage, field, default=None)
2161
2162         track = extract_meta('Song')
2163         artist = extract_meta('Artist')
2164
2165         # Youtube Music Auto-generated description
2166         album = release_date = release_year = None
2167         if video_description:
2168             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2169             if mobj:
2170                 if not track:
2171                     track = mobj.group('track').strip()
2172                 if not artist:
2173                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2174                 album = mobj.group('album'.strip())
2175                 release_year = mobj.group('release_year')
2176                 release_date = mobj.group('release_date')
2177                 if release_date:
2178                     release_date = release_date.replace('-', '')
2179                     if not release_year:
2180                         release_year = int(release_date[:4])
2181                 if release_year:
2182                     release_year = int(release_year)
2183
2184         m_episode = re.search(
2185             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2186             video_webpage)
2187         if m_episode:
2188             series = unescapeHTML(m_episode.group('series'))
2189             season_number = int(m_episode.group('season'))
2190             episode_number = int(m_episode.group('episode'))
2191         else:
2192             series = season_number = episode_number = None
2193
2194         m_cat_container = self._search_regex(
2195             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2196             video_webpage, 'categories', default=None)
2197         if m_cat_container:
2198             category = self._html_search_regex(
2199                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2200                 default=None)
2201             video_categories = None if category is None else [category]
2202         else:
2203             video_categories = None
2204
2205         video_tags = [
2206             unescapeHTML(m.group('content'))
2207             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2208
2209         def _extract_count(count_name):
2210             return str_to_int(self._search_regex(
2211                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2212                 % re.escape(count_name),
2213                 video_webpage, count_name, default=None))
2214
2215         like_count = _extract_count('like')
2216         dislike_count = _extract_count('dislike')
2217
2218         if view_count is None:
2219             view_count = str_to_int(self._search_regex(
2220                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2221                 'view count', default=None))
2222
2223         # subtitles
2224         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2225         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2226
2227         video_duration = try_get(
2228             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2229         if not video_duration:
2230             video_duration = int_or_none(video_details.get('lengthSeconds'))
2231         if not video_duration:
2232             video_duration = parse_duration(self._html_search_meta(
2233                 'duration', video_webpage, 'video duration'))
2234
2235         # annotations
2236         video_annotations = None
2237         if self._downloader.params.get('writeannotations', False):
2238             video_annotations = self._extract_annotations(video_id)
2239
2240         chapters = self._extract_chapters(description_original, video_duration)
2241
2242         # Look for the DASH manifest
2243         if self._downloader.params.get('youtube_include_dash_manifest', True):
2244             dash_mpd_fatal = True
2245             for mpd_url in dash_mpds:
2246                 dash_formats = {}
2247                 try:
2248                     def decrypt_sig(mobj):
2249                         s = mobj.group(1)
2250                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2251                         return '/signature/%s' % dec_s
2252
2253                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2254
2255                     for df in self._extract_mpd_formats(
2256                             mpd_url, video_id, fatal=dash_mpd_fatal,
2257                             formats_dict=self._formats):
2258                         if not df.get('filesize'):
2259                             df['filesize'] = _extract_filesize(df['url'])
2260                         # Do not overwrite DASH format found in some previous DASH manifest
2261                         if df['format_id'] not in dash_formats:
2262                             dash_formats[df['format_id']] = df
2263                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2264                         # allow them to fail without bug report message if we already have
2265                         # some DASH manifest succeeded. This is temporary workaround to reduce
2266                         # burst of bug reports until we figure out the reason and whether it
2267                         # can be fixed at all.
2268                         dash_mpd_fatal = False
2269                 except (ExtractorError, KeyError) as e:
2270                     self.report_warning(
2271                         'Skipping DASH manifest: %r' % e, video_id)
2272                 if dash_formats:
2273                     # Remove the formats we found through non-DASH, they
2274                     # contain less info and it can be wrong, because we use
2275                     # fixed values (for example the resolution). See
2276                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2277                     # example.
2278                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2279                     formats.extend(dash_formats.values())
2280
2281         # Check for malformed aspect ratio
2282         stretched_m = re.search(
2283             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2284             video_webpage)
2285         if stretched_m:
2286             w = float(stretched_m.group('w'))
2287             h = float(stretched_m.group('h'))
2288             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2289             # We will only process correct ratios.
2290             if w > 0 and h > 0:
2291                 ratio = w / h
2292                 for f in formats:
2293                     if f.get('vcodec') != 'none':
2294                         f['stretched_ratio'] = ratio
2295
2296         self._sort_formats(formats)
2297
2298         self.mark_watched(video_id, video_info, player_response)
2299
2300         return {
2301             'id': video_id,
2302             'uploader': video_uploader,
2303             'uploader_id': video_uploader_id,
2304             'uploader_url': video_uploader_url,
2305             'channel_id': channel_id,
2306             'channel_url': channel_url,
2307             'upload_date': upload_date,
2308             'license': video_license,
2309             'creator': video_creator or artist,
2310             'title': video_title,
2311             'alt_title': video_alt_title or track,
2312             'thumbnail': video_thumbnail,
2313             'description': video_description,
2314             'categories': video_categories,
2315             'tags': video_tags,
2316             'subtitles': video_subtitles,
2317             'automatic_captions': automatic_captions,
2318             'duration': video_duration,
2319             'age_limit': 18 if age_gate else 0,
2320             'annotations': video_annotations,
2321             'chapters': chapters,
2322             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2323             'view_count': view_count,
2324             'like_count': like_count,
2325             'dislike_count': dislike_count,
2326             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2327             'formats': formats,
2328             'is_live': is_live,
2329             'start_time': start_time,
2330             'end_time': end_time,
2331             'series': series,
2332             'season_number': season_number,
2333             'episode_number': episode_number,
2334             'track': track,
2335             'artist': artist,
2336             'album': album,
2337             'release_date': release_date,
2338             'release_year': release_year,
2339         }
2340
2341
2342 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2343     IE_DESC = 'YouTube.com playlists'
2344     _VALID_URL = r"""(?x)(?:
2345                         (?:https?://)?
2346                         (?:\w+\.)?
2347                         (?:
2348                             (?:
2349                                 youtube\.com|
2350                                 invidio\.us
2351                             )
2352                             /
2353                             (?:
2354                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2355                                \? (?:.*?[&;])*? (?:p|a|list)=
2356                             |  p/
2357                             )|
2358                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2359                         )
2360                         (
2361                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2362                             # Top tracks, they can also include dots
2363                             |(?:MC)[\w\.]*
2364                         )
2365                         .*
2366                      |
2367                         (%(playlist_id)s)
2368                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2369     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2370     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2371     IE_NAME = 'youtube:playlist'
2372     _TESTS = [{
2373         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2374         'info_dict': {
2375             'title': 'ytdl test PL',
2376             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2377         },
2378         'playlist_count': 3,
2379     }, {
2380         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2381         'info_dict': {
2382             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2383             'title': 'YDL_Empty_List',
2384         },
2385         'playlist_count': 0,
2386         'skip': 'This playlist is private',
2387     }, {
2388         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2389         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2390         'info_dict': {
2391             'title': '29C3: Not my department',
2392             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2393         },
2394         'playlist_count': 95,
2395     }, {
2396         'note': 'issue #673',
2397         'url': 'PLBB231211A4F62143',
2398         'info_dict': {
2399             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2400             'id': 'PLBB231211A4F62143',
2401         },
2402         'playlist_mincount': 26,
2403     }, {
2404         'note': 'Large playlist',
2405         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2406         'info_dict': {
2407             'title': 'Uploads from Cauchemar',
2408             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2409         },
2410         'playlist_mincount': 799,
2411     }, {
2412         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2413         'info_dict': {
2414             'title': 'YDL_safe_search',
2415             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2416         },
2417         'playlist_count': 2,
2418         'skip': 'This playlist is private',
2419     }, {
2420         'note': 'embedded',
2421         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2422         'playlist_count': 4,
2423         'info_dict': {
2424             'title': 'JODA15',
2425             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2426         }
2427     }, {
2428         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2429         'playlist_mincount': 485,
2430         'info_dict': {
2431             'title': '2017 華語最新單曲 (2/24更新)',
2432             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2433         }
2434     }, {
2435         'note': 'Embedded SWF player',
2436         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2437         'playlist_count': 4,
2438         'info_dict': {
2439             'title': 'JODA7',
2440             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2441         }
2442     }, {
2443         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2444         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2445         'info_dict': {
2446             'title': 'Uploads from Interstellar Movie',
2447             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2448         },
2449         'playlist_mincount': 21,
2450     }, {
2451         # Playlist URL that does not actually serve a playlist
2452         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2453         'info_dict': {
2454             'id': 'FqZTN594JQw',
2455             'ext': 'webm',
2456             'title': "Smiley's People 01 detective, Adventure Series, Action",
2457             'uploader': 'STREEM',
2458             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2459             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2460             'upload_date': '20150526',
2461             'license': 'Standard YouTube License',
2462             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2463             'categories': ['People & Blogs'],
2464             'tags': list,
2465             'view_count': int,
2466             'like_count': int,
2467             'dislike_count': int,
2468         },
2469         'params': {
2470             'skip_download': True,
2471         },
2472         'add_ie': [YoutubeIE.ie_key()],
2473     }, {
2474         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2475         'info_dict': {
2476             'id': 'yeWKywCrFtk',
2477             'ext': 'mp4',
2478             'title': 'Small Scale Baler and Braiding Rugs',
2479             'uploader': 'Backus-Page House Museum',
2480             'uploader_id': 'backuspagemuseum',
2481             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2482             'upload_date': '20161008',
2483             'license': 'Standard YouTube License',
2484             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2485             'categories': ['Nonprofits & Activism'],
2486             'tags': list,
2487             'like_count': int,
2488             'dislike_count': int,
2489         },
2490         'params': {
2491             'noplaylist': True,
2492             'skip_download': True,
2493         },
2494     }, {
2495         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2496         'only_matching': True,
2497     }, {
2498         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2499         'only_matching': True,
2500     }, {
2501         # music album playlist
2502         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2503         'only_matching': True,
2504     }, {
2505         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2506         'only_matching': True,
2507     }]
2508
2509     def _real_initialize(self):
2510         self._login()
2511
2512     def _extract_mix(self, playlist_id):
2513         # The mixes are generated from a single video
2514         # the id of the playlist is just 'RD' + video_id
2515         ids = []
2516         last_id = playlist_id[-11:]
2517         for n in itertools.count(1):
2518             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2519             webpage = self._download_webpage(
2520                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2521             new_ids = orderedSet(re.findall(
2522                 r'''(?xs)data-video-username=".*?".*?
2523                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2524                 webpage))
2525             # Fetch new pages until all the videos are repeated, it seems that
2526             # there are always 51 unique videos.
2527             new_ids = [_id for _id in new_ids if _id not in ids]
2528             if not new_ids:
2529                 break
2530             ids.extend(new_ids)
2531             last_id = ids[-1]
2532
2533         url_results = self._ids_to_results(ids)
2534
2535         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2536         title_span = (
2537             search_title('playlist-title') or
2538             search_title('title long-title') or
2539             search_title('title'))
2540         title = clean_html(title_span)
2541
2542         return self.playlist_result(url_results, playlist_id, title)
2543
2544     def _extract_playlist(self, playlist_id):
2545         url = self._TEMPLATE_URL % playlist_id
2546         page = self._download_webpage(url, playlist_id)
2547
2548         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2549         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2550             match = match.strip()
2551             # Check if the playlist exists or is private
2552             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2553             if mobj:
2554                 reason = mobj.group('reason')
2555                 message = 'This playlist %s' % reason
2556                 if 'private' in reason:
2557                     message += ', use --username or --netrc to access it'
2558                 message += '.'
2559                 raise ExtractorError(message, expected=True)
2560             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2561                 raise ExtractorError(
2562                     'Invalid parameters. Maybe URL is incorrect.',
2563                     expected=True)
2564             elif re.match(r'[^<]*Choose your language[^<]*', match):
2565                 continue
2566             else:
2567                 self.report_warning('Youtube gives an alert message: ' + match)
2568
2569         playlist_title = self._html_search_regex(
2570             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2571             page, 'title', default=None)
2572
2573         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2574         uploader = self._search_regex(
2575             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2576             page, 'uploader', default=None)
2577         mobj = re.search(
2578             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2579             page)
2580         if mobj:
2581             uploader_id = mobj.group('uploader_id')
2582             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2583         else:
2584             uploader_id = uploader_url = None
2585
2586         has_videos = True
2587
2588         if not playlist_title:
2589             try:
2590                 # Some playlist URLs don't actually serve a playlist (e.g.
2591                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2592                 next(self._entries(page, playlist_id))
2593             except StopIteration:
2594                 has_videos = False
2595
2596         playlist = self.playlist_result(
2597             self._entries(page, playlist_id), playlist_id, playlist_title)
2598         playlist.update({
2599             'uploader': uploader,
2600             'uploader_id': uploader_id,
2601             'uploader_url': uploader_url,
2602         })
2603
2604         return has_videos, playlist
2605
2606     def _check_download_just_video(self, url, playlist_id):
2607         # Check if it's a video-specific URL
2608         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2609         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2610             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2611             'video id', default=None)
2612         if video_id:
2613             if self._downloader.params.get('noplaylist'):
2614                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2615                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2616             else:
2617                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2618                 return video_id, None
2619         return None, None
2620
2621     def _real_extract(self, url):
2622         # Extract playlist id
2623         mobj = re.match(self._VALID_URL, url)
2624         if mobj is None:
2625             raise ExtractorError('Invalid URL: %s' % url)
2626         playlist_id = mobj.group(1) or mobj.group(2)
2627
2628         video_id, video = self._check_download_just_video(url, playlist_id)
2629         if video:
2630             return video
2631
2632         if playlist_id.startswith(('RD', 'UL', 'PU')):
2633             # Mixes require a custom extraction process
2634             return self._extract_mix(playlist_id)
2635
2636         has_videos, playlist = self._extract_playlist(playlist_id)
2637         if has_videos or not video_id:
2638             return playlist
2639
2640         # Some playlist URLs don't actually serve a playlist (see
2641         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2642         # Fallback to plain video extraction if there is a video id
2643         # along with playlist id.
2644         return self.url_result(video_id, 'Youtube', video_id=video_id)
2645
2646
2647 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2648     IE_DESC = 'YouTube.com channels'
2649     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2650     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2651     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2652     IE_NAME = 'youtube:channel'
2653     _TESTS = [{
2654         'note': 'paginated channel',
2655         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2656         'playlist_mincount': 91,
2657         'info_dict': {
2658             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2659             'title': 'Uploads from lex will',
2660         }
2661     }, {
2662         'note': 'Age restricted channel',
2663         # from https://www.youtube.com/user/DeusExOfficial
2664         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2665         'playlist_mincount': 64,
2666         'info_dict': {
2667             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2668             'title': 'Uploads from Deus Ex',
2669         },
2670     }, {
2671         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2672         'only_matching': True,
2673     }]
2674
2675     @classmethod
2676     def suitable(cls, url):
2677         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2678                 else super(YoutubeChannelIE, cls).suitable(url))
2679
2680     def _build_template_url(self, url, channel_id):
2681         return self._TEMPLATE_URL % channel_id
2682
2683     def _real_extract(self, url):
2684         channel_id = self._match_id(url)
2685
2686         url = self._build_template_url(url, channel_id)
2687
2688         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2689         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2690         # otherwise fallback on channel by page extraction
2691         channel_page = self._download_webpage(
2692             url + '?view=57', channel_id,
2693             'Downloading channel page', fatal=False)
2694         if channel_page is False:
2695             channel_playlist_id = False
2696         else:
2697             channel_playlist_id = self._html_search_meta(
2698                 'channelId', channel_page, 'channel id', default=None)
2699             if not channel_playlist_id:
2700                 channel_url = self._html_search_meta(
2701                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2702                     channel_page, 'channel url', default=None)
2703                 if channel_url:
2704                     channel_playlist_id = self._search_regex(
2705                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2706                         channel_url, 'channel id', default=None)
2707         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2708             playlist_id = 'UU' + channel_playlist_id[2:]
2709             return self.url_result(
2710                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2711
2712         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2713         autogenerated = re.search(r'''(?x)
2714                 class="[^"]*?(?:
2715                     channel-header-autogenerated-label|
2716                     yt-channel-title-autogenerated
2717                 )[^"]*"''', channel_page) is not None
2718
2719         if autogenerated:
2720             # The videos are contained in a single page
2721             # the ajax pages can't be used, they are empty
2722             entries = [
2723                 self.url_result(
2724                     video_id, 'Youtube', video_id=video_id,
2725                     video_title=video_title)
2726                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2727             return self.playlist_result(entries, channel_id)
2728
2729         try:
2730             next(self._entries(channel_page, channel_id))
2731         except StopIteration:
2732             alert_message = self._html_search_regex(
2733                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2734                 channel_page, 'alert', default=None, group='alert')
2735             if alert_message:
2736                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2737
2738         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2739
2740
2741 class YoutubeUserIE(YoutubeChannelIE):
2742     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2743     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2744     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2745     IE_NAME = 'youtube:user'
2746
2747     _TESTS = [{
2748         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2749         'playlist_mincount': 320,
2750         'info_dict': {
2751             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2752             'title': 'Uploads from The Linux Foundation',
2753         }
2754     }, {
2755         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2756         # but not https://www.youtube.com/user/12minuteathlete/videos
2757         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2758         'playlist_mincount': 249,
2759         'info_dict': {
2760             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2761             'title': 'Uploads from 12 Minute Athlete',
2762         }
2763     }, {
2764         'url': 'ytuser:phihag',
2765         'only_matching': True,
2766     }, {
2767         'url': 'https://www.youtube.com/c/gametrailers',
2768         'only_matching': True,
2769     }, {
2770         'url': 'https://www.youtube.com/gametrailers',
2771         'only_matching': True,
2772     }, {
2773         # This channel is not available, geo restricted to JP
2774         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2775         'only_matching': True,
2776     }]
2777
2778     @classmethod
2779     def suitable(cls, url):
2780         # Don't return True if the url can be extracted with other youtube
2781         # extractor, the regex would is too permissive and it would match.
2782         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2783         if any(ie.suitable(url) for ie in other_yt_ies):
2784             return False
2785         else:
2786             return super(YoutubeUserIE, cls).suitable(url)
2787
2788     def _build_template_url(self, url, channel_id):
2789         mobj = re.match(self._VALID_URL, url)
2790         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2791
2792
2793 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2794     IE_DESC = 'YouTube.com live streams'
2795     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2796     IE_NAME = 'youtube:live'
2797
2798     _TESTS = [{
2799         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2800         'info_dict': {
2801             'id': 'a48o2S1cPoo',
2802             'ext': 'mp4',
2803             'title': 'The Young Turks - Live Main Show',
2804             'uploader': 'The Young Turks',
2805             'uploader_id': 'TheYoungTurks',
2806             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2807             'upload_date': '20150715',
2808             'license': 'Standard YouTube License',
2809             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2810             'categories': ['News & Politics'],
2811             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2812             'like_count': int,
2813             'dislike_count': int,
2814         },
2815         'params': {
2816             'skip_download': True,
2817         },
2818     }, {
2819         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2820         'only_matching': True,
2821     }, {
2822         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2823         'only_matching': True,
2824     }, {
2825         'url': 'https://www.youtube.com/TheYoungTurks/live',
2826         'only_matching': True,
2827     }]
2828
2829     def _real_extract(self, url):
2830         mobj = re.match(self._VALID_URL, url)
2831         channel_id = mobj.group('id')
2832         base_url = mobj.group('base_url')
2833         webpage = self._download_webpage(url, channel_id, fatal=False)
2834         if webpage:
2835             page_type = self._og_search_property(
2836                 'type', webpage, 'page type', default='')
2837             video_id = self._html_search_meta(
2838                 'videoId', webpage, 'video id', default=None)
2839             if page_type.startswith('video') and video_id and re.match(
2840                     r'^[0-9A-Za-z_-]{11}$', video_id):
2841                 return self.url_result(video_id, YoutubeIE.ie_key())
2842         return self.url_result(base_url)
2843
2844
2845 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2846     IE_DESC = 'YouTube.com user/channel playlists'
2847     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2848     IE_NAME = 'youtube:playlists'
2849
2850     _TESTS = [{
2851         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2852         'playlist_mincount': 4,
2853         'info_dict': {
2854             'id': 'ThirstForScience',
2855             'title': 'Thirst for Science',
2856         },
2857     }, {
2858         # with "Load more" button
2859         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2860         'playlist_mincount': 70,
2861         'info_dict': {
2862             'id': 'igorkle1',
2863             'title': 'Игорь Клейнер',
2864         },
2865     }, {
2866         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2867         'playlist_mincount': 17,
2868         'info_dict': {
2869             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2870             'title': 'Chem Player',
2871         },
2872     }]
2873
2874
2875 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2876     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2877
2878
2879 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2880     IE_DESC = 'YouTube.com searches'
2881     # there doesn't appear to be a real limit, for example if you search for
2882     # 'python' you get more than 8.000.000 results
2883     _MAX_RESULTS = float('inf')
2884     IE_NAME = 'youtube:search'
2885     _SEARCH_KEY = 'ytsearch'
2886     _EXTRA_QUERY_ARGS = {}
2887     _TESTS = []
2888
2889     def _get_n_results(self, query, n):
2890         """Get a specified number of results for a query"""
2891
2892         videos = []
2893         limit = n
2894
2895         url_query = {
2896             'search_query': query.encode('utf-8'),
2897         }
2898         url_query.update(self._EXTRA_QUERY_ARGS)
2899         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2900
2901         for pagenum in itertools.count(1):
2902             data = self._download_json(
2903                 result_url, video_id='query "%s"' % query,
2904                 note='Downloading page %s' % pagenum,
2905                 errnote='Unable to download API page',
2906                 query={'spf': 'navigate'})
2907             html_content = data[1]['body']['content']
2908
2909             if 'class="search-message' in html_content:
2910                 raise ExtractorError(
2911                     '[youtube] No video results', expected=True)
2912
2913             new_videos = list(self._process_page(html_content))
2914             videos += new_videos
2915             if not new_videos or len(videos) > limit:
2916                 break
2917             next_link = self._html_search_regex(
2918                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2919                 html_content, 'next link', default=None)
2920             if next_link is None:
2921                 break
2922             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2923
2924         if len(videos) > n:
2925             videos = videos[:n]
2926         return self.playlist_result(videos, query)
2927
2928
2929 class YoutubeSearchDateIE(YoutubeSearchIE):
2930     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2931     _SEARCH_KEY = 'ytsearchdate'
2932     IE_DESC = 'YouTube.com searches, newest videos first'
2933     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2934
2935
2936 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2937     IE_DESC = 'YouTube.com search URLs'
2938     IE_NAME = 'youtube:search_url'
2939     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2940     _TESTS = [{
2941         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2942         'playlist_mincount': 5,
2943         'info_dict': {
2944             'title': 'youtube-dl test video',
2945         }
2946     }, {
2947         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2948         'only_matching': True,
2949     }]
2950
2951     def _real_extract(self, url):
2952         mobj = re.match(self._VALID_URL, url)
2953         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2954         webpage = self._download_webpage(url, query)
2955         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2956
2957
2958 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2959     IE_DESC = 'YouTube.com (multi-season) shows'
2960     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2961     IE_NAME = 'youtube:show'
2962     _TESTS = [{
2963         'url': 'https://www.youtube.com/show/airdisasters',
2964         'playlist_mincount': 5,
2965         'info_dict': {
2966             'id': 'airdisasters',
2967             'title': 'Air Disasters',
2968         }
2969     }]
2970
2971     def _real_extract(self, url):
2972         playlist_id = self._match_id(url)
2973         return super(YoutubeShowIE, self)._real_extract(
2974             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2975
2976
2977 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2978     """
2979     Base class for feed extractors
2980     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2981     """
2982     _LOGIN_REQUIRED = True
2983
2984     @property
2985     def IE_NAME(self):
2986         return 'youtube:%s' % self._FEED_NAME
2987
2988     def _real_initialize(self):
2989         self._login()
2990
2991     def _entries(self, page):
2992         # The extraction process is the same as for playlists, but the regex
2993         # for the video ids doesn't contain an index
2994         ids = []
2995         more_widget_html = content_html = page
2996         for page_num in itertools.count(1):
2997             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2998
2999             # 'recommended' feed has infinite 'load more' and each new portion spins
3000             # the same videos in (sometimes) slightly different order, so we'll check
3001             # for unicity and break when portion has no new videos
3002             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3003             if not new_ids:
3004                 break
3005
3006             ids.extend(new_ids)
3007
3008             for entry in self._ids_to_results(new_ids):
3009                 yield entry
3010
3011             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3012             if not mobj:
3013                 break
3014
3015             more = self._download_json(
3016                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3017                 'Downloading page #%s' % page_num,
3018                 transform_source=uppercase_escape)
3019             content_html = more['content_html']
3020             more_widget_html = more['load_more_widget_html']
3021
3022     def _real_extract(self, url):
3023         page = self._download_webpage(
3024             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3025             self._PLAYLIST_TITLE)
3026         return self.playlist_result(
3027             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3028
3029
3030 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3031     IE_NAME = 'youtube:watchlater'
3032     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3033     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3034
3035     _TESTS = [{
3036         'url': 'https://www.youtube.com/playlist?list=WL',
3037         'only_matching': True,
3038     }, {
3039         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3040         'only_matching': True,
3041     }]
3042
3043     def _real_extract(self, url):
3044         _, video = self._check_download_just_video(url, 'WL')
3045         if video:
3046             return video
3047         _, playlist = self._extract_playlist('WL')
3048         return playlist
3049
3050
3051 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3052     IE_NAME = 'youtube:favorites'
3053     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3054     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3055     _LOGIN_REQUIRED = True
3056
3057     def _real_extract(self, url):
3058         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3059         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3060         return self.url_result(playlist_id, 'YoutubePlaylist')
3061
3062
3063 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3064     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3065     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3066     _FEED_NAME = 'recommended'
3067     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3068
3069
3070 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3071     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3072     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3073     _FEED_NAME = 'subscriptions'
3074     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3075
3076
3077 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3078     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3079     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3080     _FEED_NAME = 'history'
3081     _PLAYLIST_TITLE = 'Youtube History'
3082
3083
3084 class YoutubeTruncatedURLIE(InfoExtractor):
3085     IE_NAME = 'youtube:truncated_url'
3086     IE_DESC = False  # Do not list
3087     _VALID_URL = r'''(?x)
3088         (?:https?://)?
3089         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3090         (?:watch\?(?:
3091             feature=[a-z_]+|
3092             annotation_id=annotation_[^&]+|
3093             x-yt-cl=[0-9]+|
3094             hl=[^&]*|
3095             t=[0-9]+
3096         )?
3097         |
3098             attribution_link\?a=[^&]+
3099         )
3100         $
3101     '''
3102
3103     _TESTS = [{
3104         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3105         'only_matching': True,
3106     }, {
3107         'url': 'https://www.youtube.com/watch?',
3108         'only_matching': True,
3109     }, {
3110         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3111         'only_matching': True,
3112     }, {
3113         'url': 'https://www.youtube.com/watch?feature=foo',
3114         'only_matching': True,
3115     }, {
3116         'url': 'https://www.youtube.com/watch?hl=en-GB',
3117         'only_matching': True,
3118     }, {
3119         'url': 'https://www.youtube.com/watch?t=2372',
3120         'only_matching': True,
3121     }]
3122
3123     def _real_extract(self, url):
3124         raise ExtractorError(
3125             'Did you forget to quote the URL? Remember that & is a meta '
3126             'character in most shells, so you want to put the URL in quotes, '
3127             'like  youtube-dl '
3128             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3129             ' or simply  youtube-dl BaW_jenozKc  .',
3130             expected=True)
3131
3132
3133 class YoutubeTruncatedIDIE(InfoExtractor):
3134     IE_NAME = 'youtube:truncated_id'
3135     IE_DESC = False  # Do not list
3136     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3137
3138     _TESTS = [{
3139         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3140         'only_matching': True,
3141     }]
3142
3143     def _real_extract(self, url):
3144         video_id = self._match_id(url)
3145         raise ExtractorError(
3146             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3147             expected=True)