_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     error_to_compat_str,
  33     extract_attributes,
  34     ExtractorError,
  35     float_or_none,
  36     get_element_by_attribute,
  37     get_element_by_id,
  38     int_or_none,
  39     mimetype2ext,
  40     orderedSet,
  41     parse_codecs,
  42     parse_duration,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     _YOUTUBE_CLIENT_HEADERS = {
  74         'x-youtube-client-name': '1',
  75         'x-youtube-client-version': '1.20200609.04.02',
  76     }
  77
  78     def _set_language(self):
  79         self._set_cookie(
  80             '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
  81             # YouTube sets the expire time to about two months
  82             expire_time=time.time() + 2 * 30 * 24 * 3600)
  83
  84     def _ids_to_results(self, ids):
  85         return [
  86             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  87             for vid_id in ids]
  88
  89     def _login(self):
  90         """
  91         Attempt to log in to YouTube.
  92         True is returned if successful or skipped.
  93         False is returned if login failed.
  94
  95         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  96         """
  97         username, password = self._get_login_info()
  98         # No authentication to be performed
  99         if username is None:
 100             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
 101                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 102             return True
 103
 104         login_page = self._download_webpage(
 105             self._LOGIN_URL, None,
 106             note='Downloading login page',
 107             errnote='unable to fetch login page', fatal=False)
 108         if login_page is False:
 109             return
 110
 111         login_form = self._hidden_inputs(login_page)
 112
 113         def req(url, f_req, note, errnote):
 114             data = login_form.copy()
 115             data.update({
 116                 'pstMsg': 1,
 117                 'checkConnection': 'youtube',
 118                 'checkedDomains': 'youtube',
 119                 'hl': 'en',
 120                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 121                 'f.req': json.dumps(f_req),
 122                 'flowName': 'GlifWebSignIn',
 123                 'flowEntry': 'ServiceLogin',
 124                 # TODO: reverse actual botguard identifier generation algo
 125                 'bgRequest': '["identifier",""]',
 126             })
 127             return self._download_json(
 128                 url, None, note=note, errnote=errnote,
 129                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 130                 fatal=False,
 131                 data=urlencode_postdata(data), headers={
 132                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 133                     'Google-Accounts-XSRF': 1,
 134                 })
 135
 136         def warn(message):
 137             self._downloader.report_warning(message)
 138
 139         lookup_req = [
 140             username,
 141             None, [], None, 'US', None, None, 2, False, True,
 142             [
 143                 None, None,
 144                 [2, 1, None, 1,
 145                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 146                  None, [], 4],
 147                 1, [None, None, []], None, None, None, True
 148             ],
 149             username,
 150         ]
 151
 152         lookup_results = req(
 153             self._LOOKUP_URL, lookup_req,
 154             'Looking up account info', 'Unable to look up account info')
 155
 156         if lookup_results is False:
 157             return False
 158
 159         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 160         if not user_hash:
 161             warn('Unable to extract user hash')
 162             return False
 163
 164         challenge_req = [
 165             user_hash,
 166             None, 1, None, [1, None, None, None, [password, None, True]],
 167             [
 168                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 169                 1, [None, None, []], None, None, None, True
 170             ]]
 171
 172         challenge_results = req(
 173             self._CHALLENGE_URL, challenge_req,
 174             'Logging in', 'Unable to log in')
 175
 176         if challenge_results is False:
 177             return
 178
 179         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 180         if login_res:
 181             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 182             warn(
 183                 'Unable to login: %s' % 'Invalid password'
 184                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 185             return False
 186
 187         res = try_get(challenge_results, lambda x: x[0][-1], list)
 188         if not res:
 189             warn('Unable to extract result entry')
 190             return False
 191
 192         login_challenge = try_get(res, lambda x: x[0][0], list)
 193         if login_challenge:
 194             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 195             if challenge_str == 'TWO_STEP_VERIFICATION':
 196                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 197                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 198                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 199                 if status == 'QUOTA_EXCEEDED':
 200                     warn('Exceeded the limit of TFA codes, try later')
 201                     return False
 202
 203                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 204                 if not tl:
 205                     warn('Unable to extract TL')
 206                     return False
 207
 208                 tfa_code = self._get_tfa_info('2-step verification code')
 209
 210                 if not tfa_code:
 211                     warn(
 212                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 213                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 214                     return False
 215
 216                 tfa_code = remove_start(tfa_code, 'G-')
 217
 218                 tfa_req = [
 219                     user_hash, None, 2, None,
 220                     [
 221                         9, None, None, None, None, None, None, None,
 222                         [None, tfa_code, True, 2]
 223                     ]]
 224
 225                 tfa_results = req(
 226                     self._TFA_URL.format(tl), tfa_req,
 227                     'Submitting TFA code', 'Unable to submit TFA code')
 228
 229                 if tfa_results is False:
 230                     return False
 231
 232                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 233                 if tfa_res:
 234                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 235                     warn(
 236                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 237                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 238                     return False
 239
 240                 check_cookie_url = try_get(
 241                     tfa_results, lambda x: x[0][-1][2], compat_str)
 242             else:
 243                 CHALLENGES = {
 244                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 245                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 246                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 247                 }
 248                 challenge = CHALLENGES.get(
 249                     challenge_str,
 250                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 251                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 252                 return False
 253         else:
 254             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 255
 256         if not check_cookie_url:
 257             warn('Unable to extract CheckCookie URL')
 258             return False
 259
 260         check_cookie_results = self._download_webpage(
 261             check_cookie_url, None, 'Checking cookie', fatal=False)
 262
 263         if check_cookie_results is False:
 264             return False
 265
 266         if 'https://myaccount.google.com/' not in check_cookie_results:
 267             warn('Unable to log in')
 268             return False
 269
 270         return True
 271
 272     def _download_webpage_handle(self, *args, **kwargs):
 273         query = kwargs.get('query', {}).copy()
 274         query['disable_polymer'] = 'true'
 275         kwargs['query'] = query
 276         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 277             *args, **compat_kwargs(kwargs))
 278
 279     def _real_initialize(self):
 280         if self._downloader is None:
 281             return
 282         self._set_language()
 283         if not self._login():
 284             return
 285
 286
 287 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 288     # Extract entries from page with "Load more" button
 289     def _entries(self, page, playlist_id):
 290         more_widget_html = content_html = page
 291         for page_num in itertools.count(1):
 292             for entry in self._process_page(content_html):
 293                 yield entry
 294
 295             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 296             if not mobj:
 297                 break
 298
 299             count = 0
 300             retries = 3
 301             while count <= retries:
 302                 try:
 303                     # Downloading page may result in intermittent 5xx HTTP error
 304                     # that is usually worked around with a retry
 305                     more = self._download_json(
 306                         'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
 307                         'Downloading page #%s%s'
 308                         % (page_num, ' (retry #%d)' % count if count else ''),
 309                         transform_source=uppercase_escape,
 310                         headers=self._YOUTUBE_CLIENT_HEADERS)
 311                     break
 312                 except ExtractorError as e:
 313                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 314                         count += 1
 315                         if count <= retries:
 316                             continue
 317                     raise
 318
 319             content_html = more['content_html']
 320             if not content_html.strip():
 321                 # Some webpages show a "Load more" button but they don't
 322                 # have more videos
 323                 break
 324             more_widget_html = more['load_more_widget_html']
 325
 326
 327 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 328     def _process_page(self, content):
 329         for video_id, video_title in self.extract_videos_from_page(content):
 330             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 331
 332     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 333         for mobj in re.finditer(video_re, page):
 334             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 335             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 336                 continue
 337             video_id = mobj.group('id')
 338             video_title = unescapeHTML(
 339                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 340             if video_title:
 341                 video_title = video_title.strip()
 342             if video_title == '► Play all':
 343                 video_title = None
 344             try:
 345                 idx = ids_in_page.index(video_id)
 346                 if video_title and not titles_in_page[idx]:
 347                     titles_in_page[idx] = video_title
 348             except ValueError:
 349                 ids_in_page.append(video_id)
 350                 titles_in_page.append(video_title)
 351
 352     def extract_videos_from_page(self, page):
 353         ids_in_page = []
 354         titles_in_page = []
 355         self.extract_videos_from_page_impl(
 356             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 357         return zip(ids_in_page, titles_in_page)
 358
 359
 360 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 361     def _process_page(self, content):
 362         for playlist_id in orderedSet(re.findall(
 363                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 364                 content)):
 365             yield self.url_result(
 366                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 367
 368     def _real_extract(self, url):
 369         playlist_id = self._match_id(url)
 370         webpage = self._download_webpage(url, playlist_id)
 371         title = self._og_search_title(webpage, fatal=False)
 372         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 373
 374
 375 class YoutubeIE(YoutubeBaseInfoExtractor):
 376     IE_DESC = 'YouTube.com'
 377     _VALID_URL = r"""(?x)^
 378                      (
 379                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 380                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
 381                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 382                             (?:www\.)?pwnyoutube\.com/|
 383                             (?:www\.)?hooktube\.com/|
 384                             (?:www\.)?yourepeat\.com/|
 385                             tube\.majestyc\.net/|
 386                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 387                             (?:(?:www|dev)\.)?invidio\.us/|
 388                             (?:(?:www|no)\.)?invidiou\.sh/|
 389                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 390                             (?:www\.)?invidious\.kabi\.tk/|
 391                             (?:www\.)?invidious\.13ad\.de/|
 392                             (?:www\.)?invidious\.mastodon\.host/|
 393                             (?:www\.)?invidious\.nixnet\.xyz/|
 394                             (?:www\.)?invidious\.drycat\.fr/|
 395                             (?:www\.)?tube\.poal\.co/|
 396                             (?:www\.)?vid\.wxzm\.sx/|
 397                             (?:www\.)?yewtu\.be/|
 398                             (?:www\.)?yt\.elukerio\.org/|
 399                             (?:www\.)?yt\.lelux\.fi/|
 400                             (?:www\.)?invidious\.ggc-project\.de/|
 401                             (?:www\.)?yt\.maisputain\.ovh/|
 402                             (?:www\.)?invidious\.13ad\.de/|
 403                             (?:www\.)?invidious\.toot\.koeln/|
 404                             (?:www\.)?invidious\.fdn\.fr/|
 405                             (?:www\.)?watch\.nettohikari\.com/|
 406                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
 407                             (?:www\.)?qklhadlycap4cnod\.onion/|
 408                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
 409                             (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
 410                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
 411                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
 412                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
 413                             (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
 414                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 415                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 416                          (?:                                                  # the various things that can precede the ID:
 417                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 418                              |(?:                                             # or the v= param in all its forms
 419                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 420                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 421                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 422                                  v=
 423                              )
 424                          ))
 425                          |(?:
 426                             youtu\.be|                                        # just youtu.be/xxxx
 427                             vid\.plus|                                        # or vid.plus/xxxx
 428                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 429                          )/
 430                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 431                          )
 432                      )?                                                       # all until now is optional -> you can pass the naked ID
 433                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 434                      (?!.*?\blist=
 435                         (?:
 436                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 437                             WL                                                # WL are handled by the watch later IE
 438                         )
 439                      )
 440                      (?(1).+)?                                                # if we found the ID, everything can follow
 441                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 442     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 443     _PLAYER_INFO_RE = (
 444         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 445         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 446     )
 447     _formats = {
 448         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 449         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 450         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 451         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 452         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 453         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 454         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 455         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 456         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 457         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 458         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 459         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 460         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 461         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 462         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 463         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 464         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 465         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 466
 467
 468         # 3D videos
 469         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 470         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 471         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 472         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 473         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 474         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 475         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 476
 477         # Apple HTTP Live Streaming
 478         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 479         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 480         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 481         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 482         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 483         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 484         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 485         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 486
 487         # DASH mp4 video
 488         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 489         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 490         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 491         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 492         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 493         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 494         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 495         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 496         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 497         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 498         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 499         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 500
 501         # Dash mp4 audio
 502         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 503         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 504         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 505         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 506         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 507         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 508         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 509
 510         # Dash webm
 511         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 512         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 513         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 514         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 515         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 516         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 517         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 518         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 519         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 520         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 521         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 522         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 523         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 524         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 525         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 526         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 527         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 528         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 529         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 530         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 531         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 532         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 533
 534         # Dash webm audio
 535         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 536         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 537
 538         # Dash webm audio with opus inside
 539         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 540         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 541         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 542
 543         # RTMP (unnamed)
 544         '_rtmp': {'protocol': 'rtmp'},
 545
 546         # av01 video only formats sometimes served with "unknown" codecs
 547         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 548         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 549         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 550         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 551     }
 552     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 553
 554     _GEO_BYPASS = False
 555
 556     IE_NAME = 'youtube'
 557     _TESTS = [
 558         {
 559             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 560             'info_dict': {
 561                 'id': 'BaW_jenozKc',
 562                 'ext': 'mp4',
 563                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 564                 'uploader': 'Philipp Hagemeister',
 565                 'uploader_id': 'phihag',
 566                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 567                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 568                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 569                 'upload_date': '20121002',
 570                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 571                 'categories': ['Science & Technology'],
 572                 'tags': ['youtube-dl'],
 573                 'duration': 10,
 574                 'view_count': int,
 575                 'like_count': int,
 576                 'dislike_count': int,
 577                 'start_time': 1,
 578                 'end_time': 9,
 579             }
 580         },
 581         {
 582             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 583             'note': 'Test generic use_cipher_signature video (#897)',
 584             'info_dict': {
 585                 'id': 'UxxajLWwzqY',
 586                 'ext': 'mp4',
 587                 'upload_date': '20120506',
 588                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 589                 'alt_title': 'I Love It (feat. Charli XCX)',
 590                 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
 591                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 592                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 593                          'iconic ep', 'iconic', 'love', 'it'],
 594                 'duration': 180,
 595                 'uploader': 'Icona Pop',
 596                 'uploader_id': 'IconaPop',
 597                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 598                 'creator': 'Icona Pop',
 599                 'track': 'I Love It (feat. Charli XCX)',
 600                 'artist': 'Icona Pop',
 601             }
 602         },
 603         {
 604             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 605             'note': 'Test VEVO video with age protection (#956)',
 606             'info_dict': {
 607                 'id': '07FYdnEawAQ',
 608                 'ext': 'mp4',
 609                 'upload_date': '20130703',
 610                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 611                 'alt_title': 'Tunnel Vision',
 612                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 613                 'duration': 419,
 614                 'uploader': 'justintimberlakeVEVO',
 615                 'uploader_id': 'justintimberlakeVEVO',
 616                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 617                 'creator': 'Justin Timberlake',
 618                 'track': 'Tunnel Vision',
 619                 'artist': 'Justin Timberlake',
 620                 'age_limit': 18,
 621             }
 622         },
 623         {
 624             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 625             'note': 'Embed-only video (#1746)',
 626             'info_dict': {
 627                 'id': 'yZIXLfi8CZQ',
 628                 'ext': 'mp4',
 629                 'upload_date': '20120608',
 630                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 631                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 632                 'uploader': 'SET India',
 633                 'uploader_id': 'setindia',
 634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 635                 'age_limit': 18,
 636             }
 637         },
 638         {
 639             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 640             'note': 'Use the first video ID in the URL',
 641             'info_dict': {
 642                 'id': 'BaW_jenozKc',
 643                 'ext': 'mp4',
 644                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 645                 'uploader': 'Philipp Hagemeister',
 646                 'uploader_id': 'phihag',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 648                 'upload_date': '20121002',
 649                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 650                 'categories': ['Science & Technology'],
 651                 'tags': ['youtube-dl'],
 652                 'duration': 10,
 653                 'view_count': int,
 654                 'like_count': int,
 655                 'dislike_count': int,
 656             },
 657             'params': {
 658                 'skip_download': True,
 659             },
 660         },
 661         {
 662             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 663             'note': '256k DASH audio (format 141) via DASH manifest',
 664             'info_dict': {
 665                 'id': 'a9LDPn-MO4I',
 666                 'ext': 'm4a',
 667                 'upload_date': '20121002',
 668                 'uploader_id': '8KVIDEO',
 669                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 670                 'description': '',
 671                 'uploader': '8KVIDEO',
 672                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 673             },
 674             'params': {
 675                 'youtube_include_dash_manifest': True,
 676                 'format': '141',
 677             },
 678             'skip': 'format 141 not served anymore',
 679         },
 680         # DASH manifest with encrypted signature
 681         {
 682             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 683             'info_dict': {
 684                 'id': 'IB3lcPjvWLA',
 685                 'ext': 'm4a',
 686                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 687                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 688                 'duration': 244,
 689                 'uploader': 'AfrojackVEVO',
 690                 'uploader_id': 'AfrojackVEVO',
 691                 'upload_date': '20131011',
 692             },
 693             'params': {
 694                 'youtube_include_dash_manifest': True,
 695                 'format': '141/bestaudio[ext=m4a]',
 696             },
 697         },
 698         # JS player signature function name containing $
 699         {
 700             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 701             'info_dict': {
 702                 'id': 'nfWlot6h_JM',
 703                 'ext': 'm4a',
 704                 'title': 'Taylor Swift - Shake It Off',
 705                 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
 706                 'duration': 242,
 707                 'uploader': 'TaylorSwiftVEVO',
 708                 'uploader_id': 'TaylorSwiftVEVO',
 709                 'upload_date': '20140818',
 710             },
 711             'params': {
 712                 'youtube_include_dash_manifest': True,
 713                 'format': '141/bestaudio[ext=m4a]',
 714             },
 715         },
 716         # Controversy video
 717         {
 718             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 719             'info_dict': {
 720                 'id': 'T4XJQO3qol8',
 721                 'ext': 'mp4',
 722                 'duration': 219,
 723                 'upload_date': '20100909',
 724                 'uploader': 'Amazing Atheist',
 725                 'uploader_id': 'TheAmazingAtheist',
 726                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 727                 'title': 'Burning Everyone\'s Koran',
 728                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 729             }
 730         },
 731         # Normal age-gate video (No vevo, embed allowed)
 732         {
 733             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 734             'info_dict': {
 735                 'id': 'HtVdAasjOgU',
 736                 'ext': 'mp4',
 737                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 738                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 739                 'duration': 142,
 740                 'uploader': 'The Witcher',
 741                 'uploader_id': 'WitcherGame',
 742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 743                 'upload_date': '20140605',
 744                 'age_limit': 18,
 745             },
 746         },
 747         # Age-gate video with encrypted signature
 748         {
 749             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 750             'info_dict': {
 751                 'id': '6kLq3WMV1nU',
 752                 'ext': 'mp4',
 753                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 754                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 755                 'duration': 246,
 756                 'uploader': 'LloydVEVO',
 757                 'uploader_id': 'LloydVEVO',
 758                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 759                 'upload_date': '20110629',
 760                 'age_limit': 18,
 761             },
 762         },
 763         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 764         # YouTube Red ad is not captured for creator
 765         {
 766             'url': '__2ABJjxzNo',
 767             'info_dict': {
 768                 'id': '__2ABJjxzNo',
 769                 'ext': 'mp4',
 770                 'duration': 266,
 771                 'upload_date': '20100430',
 772                 'uploader_id': 'deadmau5',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 774                 'creator': 'Dada Life, deadmau5',
 775                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 776                 'uploader': 'deadmau5',
 777                 'title': 'Deadmau5 - Some Chords (HD)',
 778                 'alt_title': 'This Machine Kills Some Chords',
 779             },
 780             'expected_warnings': [
 781                 'DASH manifest missing',
 782             ]
 783         },
 784         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 785         {
 786             'url': 'lqQg6PlCWgI',
 787             'info_dict': {
 788                 'id': 'lqQg6PlCWgI',
 789                 'ext': 'mp4',
 790                 'duration': 6085,
 791                 'upload_date': '20150827',
 792                 'uploader_id': 'olympic',
 793                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 794                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 795                 'uploader': 'Olympic',
 796                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 797             },
 798             'params': {
 799                 'skip_download': 'requires avconv',
 800             }
 801         },
 802         # Non-square pixels
 803         {
 804             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 805             'info_dict': {
 806                 'id': '_b-2C3KPAM0',
 807                 'ext': 'mp4',
 808                 'stretched_ratio': 16 / 9.,
 809                 'duration': 85,
 810                 'upload_date': '20110310',
 811                 'uploader_id': 'AllenMeow',
 812                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 813                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 814                 'uploader': '孫ᄋᄅ',
 815                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 816             },
 817         },
 818         # url_encoded_fmt_stream_map is empty string
 819         {
 820             'url': 'qEJwOuvDf7I',
 821             'info_dict': {
 822                 'id': 'qEJwOuvDf7I',
 823                 'ext': 'webm',
 824                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 825                 'description': '',
 826                 'upload_date': '20150404',
 827                 'uploader_id': 'spbelect',
 828                 'uploader': 'Наблюдатели Петербурга',
 829             },
 830             'params': {
 831                 'skip_download': 'requires avconv',
 832             },
 833             'skip': 'This live event has ended.',
 834         },
 835         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 836         {
 837             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 838             'info_dict': {
 839                 'id': 'FIl7x6_3R5Y',
 840                 'ext': 'webm',
 841                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 842                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 843                 'duration': 220,
 844                 'upload_date': '20150625',
 845                 'uploader_id': 'dorappi2000',
 846                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 847                 'uploader': 'dorappi2000',
 848                 'formats': 'mincount:31',
 849             },
 850             'skip': 'not actual anymore',
 851         },
 852         # DASH manifest with segment_list
 853         {
 854             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 855             'md5': '8ce563a1d667b599d21064e982ab9e31',
 856             'info_dict': {
 857                 'id': 'CsmdDsKjzN8',
 858                 'ext': 'mp4',
 859                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 860                 'uploader': 'Airtek',
 861                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 862                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 863                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 864             },
 865             'params': {
 866                 'youtube_include_dash_manifest': True,
 867                 'format': '135',  # bestvideo
 868             },
 869             'skip': 'This live event has ended.',
 870         },
 871         {
 872             # Multifeed videos (multiple cameras), URL is for Main Camera
 873             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 874             'info_dict': {
 875                 'id': 'jqWvoWXjCVs',
 876                 'title': 'teamPGP: Rocket League Noob Stream',
 877                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 878             },
 879             'playlist': [{
 880                 'info_dict': {
 881                     'id': 'jqWvoWXjCVs',
 882                     'ext': 'mp4',
 883                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 884                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 885                     'duration': 7335,
 886                     'upload_date': '20150721',
 887                     'uploader': 'Beer Games Beer',
 888                     'uploader_id': 'beergamesbeer',
 889                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 890                     'license': 'Standard YouTube License',
 891                 },
 892             }, {
 893                 'info_dict': {
 894                     'id': '6h8e8xoXJzg',
 895                     'ext': 'mp4',
 896                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 897                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 898                     'duration': 7337,
 899                     'upload_date': '20150721',
 900                     'uploader': 'Beer Games Beer',
 901                     'uploader_id': 'beergamesbeer',
 902                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 903                     'license': 'Standard YouTube License',
 904                 },
 905             }, {
 906                 'info_dict': {
 907                     'id': 'PUOgX5z9xZw',
 908                     'ext': 'mp4',
 909                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 910                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 911                     'duration': 7337,
 912                     'upload_date': '20150721',
 913                     'uploader': 'Beer Games Beer',
 914                     'uploader_id': 'beergamesbeer',
 915                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 916                     'license': 'Standard YouTube License',
 917                 },
 918             }, {
 919                 'info_dict': {
 920                     'id': 'teuwxikvS5k',
 921                     'ext': 'mp4',
 922                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 923                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 924                     'duration': 7334,
 925                     'upload_date': '20150721',
 926                     'uploader': 'Beer Games Beer',
 927                     'uploader_id': 'beergamesbeer',
 928                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 929                     'license': 'Standard YouTube License',
 930                 },
 931             }],
 932             'params': {
 933                 'skip_download': True,
 934             },
 935             'skip': 'This video is not available.',
 936         },
 937         {
 938             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 939             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 940             'info_dict': {
 941                 'id': 'gVfLd0zydlo',
 942                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 943             },
 944             'playlist_count': 2,
 945             'skip': 'Not multifeed anymore',
 946         },
 947         {
 948             'url': 'https://vid.plus/FlRa-iH7PGw',
 949             'only_matching': True,
 950         },
 951         {
 952             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 953             'only_matching': True,
 954         },
 955         {
 956             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 957             # Also tests cut-off URL expansion in video description (see
 958             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 959             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 960             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 961             'info_dict': {
 962                 'id': 'lsguqyKfVQg',
 963                 'ext': 'mp4',
 964                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 965                 'alt_title': 'Dark Walk - Position Music',
 966                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 967                 'duration': 133,
 968                 'upload_date': '20151119',
 969                 'uploader_id': 'IronSoulElf',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 971                 'uploader': 'IronSoulElf',
 972                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 973                 'track': 'Dark Walk - Position Music',
 974                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 975                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 976             },
 977             'params': {
 978                 'skip_download': True,
 979             },
 980         },
 981         {
 982             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 983             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 984             'only_matching': True,
 985         },
 986         {
 987             # Video with yt:stretch=17:0
 988             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 989             'info_dict': {
 990                 'id': 'Q39EVAstoRM',
 991                 'ext': 'mp4',
 992                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 993                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 994                 'upload_date': '20151107',
 995                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 996                 'uploader': 'CH GAMER DROID',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001             'skip': 'This video does not exist.',
1002         },
1003         {
1004             # Video licensed under Creative Commons
1005             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1006             'info_dict': {
1007                 'id': 'M4gD1WSo5mA',
1008                 'ext': 'mp4',
1009                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1010                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1011                 'duration': 721,
1012                 'upload_date': '20150127',
1013                 'uploader_id': 'BerkmanCenter',
1014                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1015                 'uploader': 'The Berkman Klein Center for Internet & Society',
1016                 'license': 'Creative Commons Attribution license (reuse allowed)',
1017             },
1018             'params': {
1019                 'skip_download': True,
1020             },
1021         },
1022         {
1023             # Channel-like uploader_url
1024             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1025             'info_dict': {
1026                 'id': 'eQcmzGIKrzg',
1027                 'ext': 'mp4',
1028                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1029                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1030                 'duration': 4060,
1031                 'upload_date': '20151119',
1032                 'uploader': 'Bernie Sanders',
1033                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1034                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1035                 'license': 'Creative Commons Attribution license (reuse allowed)',
1036             },
1037             'params': {
1038                 'skip_download': True,
1039             },
1040         },
1041         {
1042             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1043             'only_matching': True,
1044         },
1045         {
1046             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1047             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1048             'only_matching': True,
1049         },
1050         {
1051             # Rental video preview
1052             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1053             'info_dict': {
1054                 'id': 'uGpuVWrhIzE',
1055                 'ext': 'mp4',
1056                 'title': 'Piku - Trailer',
1057                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1058                 'upload_date': '20150811',
1059                 'uploader': 'FlixMatrix',
1060                 'uploader_id': 'FlixMatrixKaravan',
1061                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1062                 'license': 'Standard YouTube License',
1063             },
1064             'params': {
1065                 'skip_download': True,
1066             },
1067             'skip': 'This video is not available.',
1068         },
1069         {
1070             # YouTube Red video with episode data
1071             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1072             'info_dict': {
1073                 'id': 'iqKdEhx-dD4',
1074                 'ext': 'mp4',
1075                 'title': 'Isolation - Mind Field (Ep 1)',
1076                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1077                 'duration': 2085,
1078                 'upload_date': '20170118',
1079                 'uploader': 'Vsauce',
1080                 'uploader_id': 'Vsauce',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1082                 'series': 'Mind Field',
1083                 'season_number': 1,
1084                 'episode_number': 1,
1085             },
1086             'params': {
1087                 'skip_download': True,
1088             },
1089             'expected_warnings': [
1090                 'Skipping DASH manifest',
1091             ],
1092         },
1093         {
1094             # The following content has been identified by the YouTube community
1095             # as inappropriate or offensive to some audiences.
1096             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1097             'info_dict': {
1098                 'id': '6SJNVb0GnPI',
1099                 'ext': 'mp4',
1100                 'title': 'Race Differences in Intelligence',
1101                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1102                 'duration': 965,
1103                 'upload_date': '20140124',
1104                 'uploader': 'New Century Foundation',
1105                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1106                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1107             },
1108             'params': {
1109                 'skip_download': True,
1110             },
1111         },
1112         {
1113             # itag 212
1114             'url': '1t24XAntNCY',
1115             'only_matching': True,
1116         },
1117         {
1118             # geo restricted to JP
1119             'url': 'sJL6WA-aGkQ',
1120             'only_matching': True,
1121         },
1122         {
1123             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1124             'only_matching': True,
1125         },
1126         {
1127             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1128             'only_matching': True,
1129         },
1130         {
1131             # DRM protected
1132             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1133             'only_matching': True,
1134         },
1135         {
1136             # Video with unsupported adaptive stream type formats
1137             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1138             'info_dict': {
1139                 'id': 'Z4Vy8R84T1U',
1140                 'ext': 'mp4',
1141                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1142                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1143                 'duration': 433,
1144                 'upload_date': '20130923',
1145                 'uploader': 'Amelia Putri Harwita',
1146                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1148                 'formats': 'maxcount:10',
1149             },
1150             'params': {
1151                 'skip_download': True,
1152                 'youtube_include_dash_manifest': False,
1153             },
1154             'skip': 'not actual anymore',
1155         },
1156         {
1157             # Youtube Music Auto-generated description
1158             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1159             'info_dict': {
1160                 'id': 'MgNrAu2pzNs',
1161                 'ext': 'mp4',
1162                 'title': 'Voyeur Girl',
1163                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1164                 'upload_date': '20190312',
1165                 'uploader': 'Stephen - Topic',
1166                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1167                 'artist': 'Stephen',
1168                 'track': 'Voyeur Girl',
1169                 'album': 'it\'s too much love to know my dear',
1170                 'release_date': '20190313',
1171                 'release_year': 2019,
1172             },
1173             'params': {
1174                 'skip_download': True,
1175             },
1176         },
1177         {
1178             # Youtube Music Auto-generated description
1179             # Retrieve 'artist' field from 'Artist:' in video description
1180             # when it is present on youtube music video
1181             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1182             'info_dict': {
1183                 'id': 'k0jLE7tTwjY',
1184                 'ext': 'mp4',
1185                 'title': 'Latch Feat. Sam Smith',
1186                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1187                 'upload_date': '20150110',
1188                 'uploader': 'Various Artists - Topic',
1189                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1190                 'artist': 'Disclosure',
1191                 'track': 'Latch Feat. Sam Smith',
1192                 'album': 'Latch Featuring Sam Smith',
1193                 'release_date': '20121008',
1194                 'release_year': 2012,
1195             },
1196             'params': {
1197                 'skip_download': True,
1198             },
1199         },
1200         {
1201             # Youtube Music Auto-generated description
1202             # handle multiple artists on youtube music video
1203             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1204             'info_dict': {
1205                 'id': '74qn0eJSjpA',
1206                 'ext': 'mp4',
1207                 'title': 'Eastside',
1208                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1209                 'upload_date': '20180710',
1210                 'uploader': 'Benny Blanco - Topic',
1211                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1212                 'artist': 'benny blanco, Halsey, Khalid',
1213                 'track': 'Eastside',
1214                 'album': 'Eastside',
1215                 'release_date': '20180713',
1216                 'release_year': 2018,
1217             },
1218             'params': {
1219                 'skip_download': True,
1220             },
1221         },
1222         {
1223             # Youtube Music Auto-generated description
1224             # handle youtube music video with release_year and no release_date
1225             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1226             'info_dict': {
1227                 'id': '-hcAI0g-f5M',
1228                 'ext': 'mp4',
1229                 'title': 'Put It On Me',
1230                 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
1231                 'upload_date': '20180426',
1232                 'uploader': 'Matt Maeson - Topic',
1233                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1234                 'artist': 'Matt Maeson',
1235                 'track': 'Put It On Me',
1236                 'album': 'The Hearse',
1237                 'release_date': None,
1238                 'release_year': 2018,
1239             },
1240             'params': {
1241                 'skip_download': True,
1242             },
1243         },
1244         {
1245             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1246             'only_matching': True,
1247         },
1248         {
1249             # invalid -> valid video id redirection
1250             'url': 'DJztXj2GPfl',
1251             'info_dict': {
1252                 'id': 'DJztXj2GPfk',
1253                 'ext': 'mp4',
1254                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1255                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1256                 'upload_date': '20090125',
1257                 'uploader': 'Prochorowka',
1258                 'uploader_id': 'Prochorowka',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1260                 'artist': 'Panjabi MC',
1261                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1262                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1263             },
1264             'params': {
1265                 'skip_download': True,
1266             },
1267         },
1268         {
1269             # empty description results in an empty string
1270             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1271             'info_dict': {
1272                 'id': 'x41yOUIvK2k',
1273                 'ext': 'mp4',
1274                 'title': 'IMG 3456',
1275                 'description': '',
1276                 'upload_date': '20170613',
1277                 'uploader_id': 'ElevageOrVert',
1278                 'uploader': 'ElevageOrVert',
1279             },
1280             'params': {
1281                 'skip_download': True,
1282             },
1283         },
1284     ]
1285
1286     def __init__(self, *args, **kwargs):
1287         super(YoutubeIE, self).__init__(*args, **kwargs)
1288         self._player_cache = {}
1289
1290     def report_video_info_webpage_download(self, video_id):
1291         """Report attempt to download video info webpage."""
1292         self.to_screen('%s: Downloading video info webpage' % video_id)
1293
1294     def report_information_extraction(self, video_id):
1295         """Report attempt to extract video information."""
1296         self.to_screen('%s: Extracting video information' % video_id)
1297
1298     def report_unavailable_format(self, video_id, format):
1299         """Report extracted video URL."""
1300         self.to_screen('%s: Format %s not available' % (video_id, format))
1301
1302     def report_rtmp_download(self):
1303         """Indicate the download will use the RTMP protocol."""
1304         self.to_screen('RTMP download detected')
1305
1306     def _signature_cache_id(self, example_sig):
1307         """ Return a string representation of a signature """
1308         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1309
1310     @classmethod
1311     def _extract_player_info(cls, player_url):
1312         for player_re in cls._PLAYER_INFO_RE:
1313             id_m = re.search(player_re, player_url)
1314             if id_m:
1315                 break
1316         else:
1317             raise ExtractorError('Cannot identify player %r' % player_url)
1318         return id_m.group('ext'), id_m.group('id')
1319
1320     def _extract_signature_function(self, video_id, player_url, example_sig):
1321         player_type, player_id = self._extract_player_info(player_url)
1322
1323         # Read from filesystem cache
1324         func_id = '%s_%s_%s' % (
1325             player_type, player_id, self._signature_cache_id(example_sig))
1326         assert os.path.basename(func_id) == func_id
1327
1328         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1329         if cache_spec is not None:
1330             return lambda s: ''.join(s[i] for i in cache_spec)
1331
1332         download_note = (
1333             'Downloading player %s' % player_url
1334             if self._downloader.params.get('verbose') else
1335             'Downloading %s player %s' % (player_type, player_id)
1336         )
1337         if player_type == 'js':
1338             code = self._download_webpage(
1339                 player_url, video_id,
1340                 note=download_note,
1341                 errnote='Download of %s failed' % player_url)
1342             res = self._parse_sig_js(code)
1343         elif player_type == 'swf':
1344             urlh = self._request_webpage(
1345                 player_url, video_id,
1346                 note=download_note,
1347                 errnote='Download of %s failed' % player_url)
1348             code = urlh.read()
1349             res = self._parse_sig_swf(code)
1350         else:
1351             assert False, 'Invalid player type %r' % player_type
1352
1353         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1354         cache_res = res(test_string)
1355         cache_spec = [ord(c) for c in cache_res]
1356
1357         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1358         return res
1359
1360     def _print_sig_code(self, func, example_sig):
1361         def gen_sig_code(idxs):
1362             def _genslice(start, end, step):
1363                 starts = '' if start == 0 else str(start)
1364                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1365                 steps = '' if step == 1 else (':%d' % step)
1366                 return 's[%s%s%s]' % (starts, ends, steps)
1367
1368             step = None
1369             # Quelch pyflakes warnings - start will be set when step is set
1370             start = '(Never used)'
1371             for i, prev in zip(idxs[1:], idxs[:-1]):
1372                 if step is not None:
1373                     if i - prev == step:
1374                         continue
1375                     yield _genslice(start, prev, step)
1376                     step = None
1377                     continue
1378                 if i - prev in [-1, 1]:
1379                     step = i - prev
1380                     start = prev
1381                     continue
1382                 else:
1383                     yield 's[%d]' % prev
1384             if step is None:
1385                 yield 's[%d]' % i
1386             else:
1387                 yield _genslice(start, i, step)
1388
1389         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1390         cache_res = func(test_string)
1391         cache_spec = [ord(c) for c in cache_res]
1392         expr_code = ' + '.join(gen_sig_code(cache_spec))
1393         signature_id_tuple = '(%s)' % (
1394             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1395         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1396                 '    return %s\n') % (signature_id_tuple, expr_code)
1397         self.to_screen('Extracted signature function:\n' + code)
1398
1399     def _parse_sig_js(self, jscode):
1400         funcname = self._search_regex(
1401             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1402              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1404              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1405              # Obsolete patterns
1406              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1407              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1408              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1409              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1410              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1411              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1412              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1413              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1414             jscode, 'Initial JS player signature function name', group='sig')
1415
1416         jsi = JSInterpreter(jscode)
1417         initial_function = jsi.extract_function(funcname)
1418         return lambda s: initial_function([s])
1419
1420     def _parse_sig_swf(self, file_contents):
1421         swfi = SWFInterpreter(file_contents)
1422         TARGET_CLASSNAME = 'SignatureDecipher'
1423         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1424         initial_function = swfi.extract_function(searched_class, 'decipher')
1425         return lambda s: initial_function([s])
1426
1427     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1428         """Turn the encrypted s field into a working signature"""
1429
1430         if player_url is None:
1431             raise ExtractorError('Cannot decrypt signature without player_url')
1432
1433         if player_url.startswith('//'):
1434             player_url = 'https:' + player_url
1435         elif not re.match(r'https?://', player_url):
1436             player_url = compat_urlparse.urljoin(
1437                 'https://www.youtube.com', player_url)
1438         try:
1439             player_id = (player_url, self._signature_cache_id(s))
1440             if player_id not in self._player_cache:
1441                 func = self._extract_signature_function(
1442                     video_id, player_url, s
1443                 )
1444                 self._player_cache[player_id] = func
1445             func = self._player_cache[player_id]
1446             if self._downloader.params.get('youtube_print_sig_code'):
1447                 self._print_sig_code(func, s)
1448             return func(s)
1449         except Exception as e:
1450             tb = traceback.format_exc()
1451             raise ExtractorError(
1452                 'Signature extraction failed: ' + tb, cause=e)
1453
1454     def _get_subtitles(self, video_id, webpage):
1455         try:
1456             subs_doc = self._download_xml(
1457                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1458                 video_id, note=False)
1459         except ExtractorError as err:
1460             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1461             return {}
1462
1463         sub_lang_list = {}
1464         for track in subs_doc.findall('track'):
1465             lang = track.attrib['lang_code']
1466             if lang in sub_lang_list:
1467                 continue
1468             sub_formats = []
1469             for ext in self._SUBTITLE_FORMATS:
1470                 params = compat_urllib_parse_urlencode({
1471                     'lang': lang,
1472                     'v': video_id,
1473                     'fmt': ext,
1474                     'name': track.attrib['name'].encode('utf-8'),
1475                 })
1476                 sub_formats.append({
1477                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1478                     'ext': ext,
1479                 })
1480             sub_lang_list[lang] = sub_formats
1481         if not sub_lang_list:
1482             self._downloader.report_warning('video doesn\'t have subtitles')
1483             return {}
1484         return sub_lang_list
1485
1486     def _get_ytplayer_config(self, video_id, webpage):
1487         patterns = (
1488             # User data may contain arbitrary character sequences that may affect
1489             # JSON extraction with regex, e.g. when '};' is contained the second
1490             # regex won't capture the whole JSON. Yet working around by trying more
1491             # concrete regex first keeping in mind proper quoted string handling
1492             # to be implemented in future that will replace this workaround (see
1493             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1494             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1495             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1496             r';ytplayer\.config\s*=\s*({.+?});',
1497         )
1498         config = self._search_regex(
1499             patterns, webpage, 'ytplayer.config', default=None)
1500         if config:
1501             return self._parse_json(
1502                 uppercase_escape(config), video_id, fatal=False)
1503
1504     def _get_automatic_captions(self, video_id, webpage):
1505         """We need the webpage for getting the captions url, pass it as an
1506            argument to speed up the process."""
1507         self.to_screen('%s: Looking for automatic captions' % video_id)
1508         player_config = self._get_ytplayer_config(video_id, webpage)
1509         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1510         if not player_config:
1511             self._downloader.report_warning(err_msg)
1512             return {}
1513         try:
1514             args = player_config['args']
1515             caption_url = args.get('ttsurl')
1516             if caption_url:
1517                 timestamp = args['timestamp']
1518                 # We get the available subtitles
1519                 list_params = compat_urllib_parse_urlencode({
1520                     'type': 'list',
1521                     'tlangs': 1,
1522                     'asrs': 1,
1523                 })
1524                 list_url = caption_url + '&' + list_params
1525                 caption_list = self._download_xml(list_url, video_id)
1526                 original_lang_node = caption_list.find('track')
1527                 if original_lang_node is None:
1528                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1529                     return {}
1530                 original_lang = original_lang_node.attrib['lang_code']
1531                 caption_kind = original_lang_node.attrib.get('kind', '')
1532
1533                 sub_lang_list = {}
1534                 for lang_node in caption_list.findall('target'):
1535                     sub_lang = lang_node.attrib['lang_code']
1536                     sub_formats = []
1537                     for ext in self._SUBTITLE_FORMATS:
1538                         params = compat_urllib_parse_urlencode({
1539                             'lang': original_lang,
1540                             'tlang': sub_lang,
1541                             'fmt': ext,
1542                             'ts': timestamp,
1543                             'kind': caption_kind,
1544                         })
1545                         sub_formats.append({
1546                             'url': caption_url + '&' + params,
1547                             'ext': ext,
1548                         })
1549                     sub_lang_list[sub_lang] = sub_formats
1550                 return sub_lang_list
1551
1552             def make_captions(sub_url, sub_langs):
1553                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1554                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1555                 captions = {}
1556                 for sub_lang in sub_langs:
1557                     sub_formats = []
1558                     for ext in self._SUBTITLE_FORMATS:
1559                         caption_qs.update({
1560                             'tlang': [sub_lang],
1561                             'fmt': [ext],
1562                         })
1563                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1564                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1565                         sub_formats.append({
1566                             'url': sub_url,
1567                             'ext': ext,
1568                         })
1569                     captions[sub_lang] = sub_formats
1570                 return captions
1571
1572             # New captions format as of 22.06.2017
1573             player_response = args.get('player_response')
1574             if player_response and isinstance(player_response, compat_str):
1575                 player_response = self._parse_json(
1576                     player_response, video_id, fatal=False)
1577                 if player_response:
1578                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1579                     base_url = renderer['captionTracks'][0]['baseUrl']
1580                     sub_lang_list = []
1581                     for lang in renderer['translationLanguages']:
1582                         lang_code = lang.get('languageCode')
1583                         if lang_code:
1584                             sub_lang_list.append(lang_code)
1585                     return make_captions(base_url, sub_lang_list)
1586
1587             # Some videos don't provide ttsurl but rather caption_tracks and
1588             # caption_translation_languages (e.g. 20LmZk1hakA)
1589             # Does not used anymore as of 22.06.2017
1590             caption_tracks = args['caption_tracks']
1591             caption_translation_languages = args['caption_translation_languages']
1592             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1593             sub_lang_list = []
1594             for lang in caption_translation_languages.split(','):
1595                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1596                 sub_lang = lang_qs.get('lc', [None])[0]
1597                 if sub_lang:
1598                     sub_lang_list.append(sub_lang)
1599             return make_captions(caption_url, sub_lang_list)
1600         # An extractor error can be raise by the download process if there are
1601         # no automatic captions but there are subtitles
1602         except (KeyError, IndexError, ExtractorError):
1603             self._downloader.report_warning(err_msg)
1604             return {}
1605
1606     def _mark_watched(self, video_id, video_info, player_response):
1607         playback_url = url_or_none(try_get(
1608             player_response,
1609             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1610             video_info, lambda x: x['videostats_playback_base_url'][0]))
1611         if not playback_url:
1612             return
1613         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1614         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1615
1616         # cpn generation algorithm is reverse engineered from base.js.
1617         # In fact it works even with dummy cpn.
1618         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1619         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1620
1621         qs.update({
1622             'ver': ['2'],
1623             'cpn': [cpn],
1624         })
1625         playback_url = compat_urlparse.urlunparse(
1626             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1627
1628         self._download_webpage(
1629             playback_url, video_id, 'Marking watched',
1630             'Unable to mark watched', fatal=False)
1631
1632     @staticmethod
1633     def _extract_urls(webpage):
1634         # Embedded YouTube player
1635         entries = [
1636             unescapeHTML(mobj.group('url'))
1637             for mobj in re.finditer(r'''(?x)
1638             (?:
1639                 <iframe[^>]+?src=|
1640                 data-video-url=|
1641                 <embed[^>]+?src=|
1642                 embedSWF\(?:\s*|
1643                 <object[^>]+data=|
1644                 new\s+SWFObject\(
1645             )
1646             (["\'])
1647                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1648                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1649             \1''', webpage)]
1650
1651         # lazyYT YouTube embed
1652         entries.extend(list(map(
1653             unescapeHTML,
1654             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1655
1656         # Wordpress "YouTube Video Importer" plugin
1657         matches = re.findall(r'''(?x)<div[^>]+
1658             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1659             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1660         entries.extend(m[-1] for m in matches)
1661
1662         return entries
1663
1664     @staticmethod
1665     def _extract_url(webpage):
1666         urls = YoutubeIE._extract_urls(webpage)
1667         return urls[0] if urls else None
1668
1669     @classmethod
1670     def extract_id(cls, url):
1671         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1672         if mobj is None:
1673             raise ExtractorError('Invalid URL: %s' % url)
1674         video_id = mobj.group(2)
1675         return video_id
1676
1677     def _extract_chapters_from_json(self, webpage, video_id, duration):
1678         if not webpage:
1679             return
1680         player = self._parse_json(
1681             self._search_regex(
1682                 r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
1683                 'player args', default='{}'),
1684             video_id, fatal=False)
1685         if not player or not isinstance(player, dict):
1686             return
1687         watch_next_response = player.get('watch_next_response')
1688         if not isinstance(watch_next_response, compat_str):
1689             return
1690         response = self._parse_json(watch_next_response, video_id, fatal=False)
1691         if not response or not isinstance(response, dict):
1692             return
1693         chapters_list = try_get(
1694             response,
1695             lambda x: x['playerOverlays']
1696                        ['playerOverlayRenderer']
1697                        ['decoratedPlayerBarRenderer']
1698                        ['decoratedPlayerBarRenderer']
1699                        ['playerBar']
1700                        ['chapteredPlayerBarRenderer']
1701                        ['chapters'],
1702             list)
1703         if not chapters_list:
1704             return
1705
1706         def chapter_time(chapter):
1707             return float_or_none(
1708                 try_get(
1709                     chapter,
1710                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1711                     int),
1712                 scale=1000)
1713         chapters = []
1714         for next_num, chapter in enumerate(chapters_list, start=1):
1715             start_time = chapter_time(chapter)
1716             if start_time is None:
1717                 continue
1718             end_time = (chapter_time(chapters_list[next_num])
1719                         if next_num < len(chapters_list) else duration)
1720             if end_time is None:
1721                 continue
1722             title = try_get(
1723                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1724                 compat_str)
1725             chapters.append({
1726                 'start_time': start_time,
1727                 'end_time': end_time,
1728                 'title': title,
1729             })
1730         return chapters
1731
1732     @staticmethod
1733     def _extract_chapters_from_description(description, duration):
1734         if not description:
1735             return None
1736         chapter_lines = re.findall(
1737             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1738             description)
1739         if not chapter_lines:
1740             return None
1741         chapters = []
1742         for next_num, (chapter_line, time_point) in enumerate(
1743                 chapter_lines, start=1):
1744             start_time = parse_duration(time_point)
1745             if start_time is None:
1746                 continue
1747             if start_time > duration:
1748                 break
1749             end_time = (duration if next_num == len(chapter_lines)
1750                         else parse_duration(chapter_lines[next_num][1]))
1751             if end_time is None:
1752                 continue
1753             if end_time > duration:
1754                 end_time = duration
1755             if start_time > end_time:
1756                 break
1757             chapter_title = re.sub(
1758                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1759             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1760             chapters.append({
1761                 'start_time': start_time,
1762                 'end_time': end_time,
1763                 'title': chapter_title,
1764             })
1765         return chapters
1766
1767     def _extract_chapters(self, webpage, description, video_id, duration):
1768         return (self._extract_chapters_from_json(webpage, video_id, duration)
1769                 or self._extract_chapters_from_description(description, duration))
1770
1771     def _real_extract(self, url):
1772         url, smuggled_data = unsmuggle_url(url, {})
1773
1774         proto = (
1775             'http' if self._downloader.params.get('prefer_insecure', False)
1776             else 'https')
1777
1778         start_time = None
1779         end_time = None
1780         parsed_url = compat_urllib_parse_urlparse(url)
1781         for component in [parsed_url.fragment, parsed_url.query]:
1782             query = compat_parse_qs(component)
1783             if start_time is None and 't' in query:
1784                 start_time = parse_duration(query['t'][0])
1785             if start_time is None and 'start' in query:
1786                 start_time = parse_duration(query['start'][0])
1787             if end_time is None and 'end' in query:
1788                 end_time = parse_duration(query['end'][0])
1789
1790         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1791         mobj = re.search(self._NEXT_URL_RE, url)
1792         if mobj:
1793             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1794         video_id = self.extract_id(url)
1795
1796         # Get video webpage
1797         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1798         video_webpage, urlh = self._download_webpage_handle(url, video_id)
1799
1800         qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1801         video_id = qs.get('v', [None])[0] or video_id
1802
1803         # Attempt to extract SWF player URL
1804         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1805         if mobj is not None:
1806             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1807         else:
1808             player_url = None
1809
1810         dash_mpds = []
1811
1812         def add_dash_mpd(video_info):
1813             dash_mpd = video_info.get('dashmpd')
1814             if dash_mpd and dash_mpd[0] not in dash_mpds:
1815                 dash_mpds.append(dash_mpd[0])
1816
1817         def add_dash_mpd_pr(pl_response):
1818             dash_mpd = url_or_none(try_get(
1819                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1820                 compat_str))
1821             if dash_mpd and dash_mpd not in dash_mpds:
1822                 dash_mpds.append(dash_mpd)
1823
1824         is_live = None
1825         view_count = None
1826
1827         def extract_view_count(v_info):
1828             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1829
1830         def extract_player_response(player_response, video_id):
1831             pl_response = str_or_none(player_response)
1832             if not pl_response:
1833                 return
1834             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1835             if isinstance(pl_response, dict):
1836                 add_dash_mpd_pr(pl_response)
1837                 return pl_response
1838
1839         player_response = {}
1840
1841         # Get video info
1842         video_info = {}
1843         embed_webpage = None
1844         if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1845                 or re.search(r'player-age-gate-content">', video_webpage) is not None):
1846             age_gate = True
1847             # We simulate the access to the video from www.youtube.com/v/{video_id}
1848             # this can be viewed without login into Youtube
1849             url = proto + '://www.youtube.com/embed/%s' % video_id
1850             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1851             data = compat_urllib_parse_urlencode({
1852                 'video_id': video_id,
1853                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1854                 'sts': self._search_regex(
1855                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1856             })
1857             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1858             try:
1859                 video_info_webpage = self._download_webpage(
1860                     video_info_url, video_id,
1861                     note='Refetching age-gated info webpage',
1862                     errnote='unable to download video info webpage')
1863             except ExtractorError:
1864                 video_info_webpage = None
1865             if video_info_webpage:
1866                 video_info = compat_parse_qs(video_info_webpage)
1867                 pl_response = video_info.get('player_response', [None])[0]
1868                 player_response = extract_player_response(pl_response, video_id)
1869                 add_dash_mpd(video_info)
1870                 view_count = extract_view_count(video_info)
1871         else:
1872             age_gate = False
1873             # Try looking directly into the video webpage
1874             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1875             if ytplayer_config:
1876                 args = ytplayer_config['args']
1877                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1878                     # Convert to the same format returned by compat_parse_qs
1879                     video_info = dict((k, [v]) for k, v in args.items())
1880                     add_dash_mpd(video_info)
1881                 # Rental video is not rented but preview is available (e.g.
1882                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1883                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1884                 if not video_info and args.get('ypc_vid'):
1885                     return self.url_result(
1886                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1887                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1888                     is_live = True
1889                 if not player_response:
1890                     player_response = extract_player_response(args.get('player_response'), video_id)
1891             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1892                 add_dash_mpd_pr(player_response)
1893
1894         def extract_unavailable_message():
1895             messages = []
1896             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1897                 msg = self._html_search_regex(
1898                     r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1899                     video_webpage, 'unavailable %s' % kind, default=None)
1900                 if msg:
1901                     messages.append(msg)
1902             if messages:
1903                 return '\n'.join(messages)
1904
1905         if not video_info and not player_response:
1906             unavailable_message = extract_unavailable_message()
1907             if not unavailable_message:
1908                 unavailable_message = 'Unable to extract video data'
1909             raise ExtractorError(
1910                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1911
1912         if not isinstance(video_info, dict):
1913             video_info = {}
1914
1915         video_details = try_get(
1916             player_response, lambda x: x['videoDetails'], dict) or {}
1917
1918         microformat = try_get(
1919             player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1920
1921         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1922         if not video_title:
1923             self._downloader.report_warning('Unable to extract video title')
1924             video_title = '_'
1925
1926         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1927         if video_description:
1928
1929             def replace_url(m):
1930                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1931                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1932                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1933                     qs = compat_parse_qs(parsed_redir_url.query)
1934                     q = qs.get('q')
1935                     if q and q[0]:
1936                         return q[0]
1937                 return redir_url
1938
1939             description_original = video_description = re.sub(r'''(?x)
1940                 <a\s+
1941                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1942                     (?:title|href)="([^"]+)"\s+
1943                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1944                     class="[^"]*"[^>]*>
1945                 [^<]+\.{3}\s*
1946                 </a>
1947             ''', replace_url, video_description)
1948             video_description = clean_html(video_description)
1949         else:
1950             video_description = video_details.get('shortDescription')
1951             if video_description is None:
1952                 video_description = self._html_search_meta('description', video_webpage)
1953
1954         if not smuggled_data.get('force_singlefeed', False):
1955             if not self._downloader.params.get('noplaylist'):
1956                 multifeed_metadata_list = try_get(
1957                     player_response,
1958                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1959                     compat_str) or try_get(
1960                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1961                 if multifeed_metadata_list:
1962                     entries = []
1963                     feed_ids = []
1964                     for feed in multifeed_metadata_list.split(','):
1965                         # Unquote should take place before split on comma (,) since textual
1966                         # fields may contain comma as well (see
1967                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1968                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1969
1970                         def feed_entry(name):
1971                             return try_get(feed_data, lambda x: x[name][0], compat_str)
1972
1973                         feed_id = feed_entry('id')
1974                         if not feed_id:
1975                             continue
1976                         feed_title = feed_entry('title')
1977                         title = video_title
1978                         if feed_title:
1979                             title += ' (%s)' % feed_title
1980                         entries.append({
1981                             '_type': 'url_transparent',
1982                             'ie_key': 'Youtube',
1983                             'url': smuggle_url(
1984                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1985                                 {'force_singlefeed': True}),
1986                             'title': title,
1987                         })
1988                         feed_ids.append(feed_id)
1989                     self.to_screen(
1990                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1991                         % (', '.join(feed_ids), video_id))
1992                     return self.playlist_result(entries, video_id, video_title, video_description)
1993             else:
1994                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1995
1996         if view_count is None:
1997             view_count = extract_view_count(video_info)
1998         if view_count is None and video_details:
1999             view_count = int_or_none(video_details.get('viewCount'))
2000         if view_count is None and microformat:
2001             view_count = int_or_none(microformat.get('viewCount'))
2002
2003         if is_live is None:
2004             is_live = bool_or_none(video_details.get('isLive'))
2005
2006         # Check for "rental" videos
2007         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
2008             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
2009
2010         def _extract_filesize(media_url):
2011             return int_or_none(self._search_regex(
2012                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2013
2014         streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2015         streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2016
2017         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2018             self.report_rtmp_download()
2019             formats = [{
2020                 'format_id': '_rtmp',
2021                 'protocol': 'rtmp',
2022                 'url': video_info['conn'][0],
2023                 'player_url': player_url,
2024             }]
2025         elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
2026             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
2027             if 'rtmpe%3Dyes' in encoded_url_map:
2028                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
2029             formats = []
2030             formats_spec = {}
2031             fmt_list = video_info.get('fmt_list', [''])[0]
2032             if fmt_list:
2033                 for fmt in fmt_list.split(','):
2034                     spec = fmt.split('/')
2035                     if len(spec) > 1:
2036                         width_height = spec[1].split('x')
2037                         if len(width_height) == 2:
2038                             formats_spec[spec[0]] = {
2039                                 'resolution': spec[1],
2040                                 'width': int_or_none(width_height[0]),
2041                                 'height': int_or_none(width_height[1]),
2042                             }
2043             for fmt in streaming_formats:
2044                 itag = str_or_none(fmt.get('itag'))
2045                 if not itag:
2046                     continue
2047                 quality = fmt.get('quality')
2048                 quality_label = fmt.get('qualityLabel') or quality
2049                 formats_spec[itag] = {
2050                     'asr': int_or_none(fmt.get('audioSampleRate')),
2051                     'filesize': int_or_none(fmt.get('contentLength')),
2052                     'format_note': quality_label,
2053                     'fps': int_or_none(fmt.get('fps')),
2054                     'height': int_or_none(fmt.get('height')),
2055                     # bitrate for itag 43 is always 2147483647
2056                     'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2057                     'width': int_or_none(fmt.get('width')),
2058                 }
2059
2060             for fmt in streaming_formats:
2061                 if fmt.get('drmFamilies') or fmt.get('drm_families'):
2062                     continue
2063                 url = url_or_none(fmt.get('url'))
2064
2065                 if not url:
2066                     cipher = fmt.get('cipher') or fmt.get('signatureCipher')
2067                     if not cipher:
2068                         continue
2069                     url_data = compat_parse_qs(cipher)
2070                     url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2071                     if not url:
2072                         continue
2073                 else:
2074                     cipher = None
2075                     url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2076
2077                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2078                 # Unsupported FORMAT_STREAM_TYPE_OTF
2079                 if stream_type == 3:
2080                     continue
2081
2082                 format_id = fmt.get('itag') or url_data['itag'][0]
2083                 if not format_id:
2084                     continue
2085                 format_id = compat_str(format_id)
2086
2087                 if cipher:
2088                     if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
2089                         ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2090                         jsplayer_url_json = self._search_regex(
2091                             ASSETS_RE,
2092                             embed_webpage if age_gate else video_webpage,
2093                             'JS player URL (1)', default=None)
2094                         if not jsplayer_url_json and not age_gate:
2095                             # We need the embed website after all
2096                             if embed_webpage is None:
2097                                 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2098                                 embed_webpage = self._download_webpage(
2099                                     embed_url, video_id, 'Downloading embed webpage')
2100                             jsplayer_url_json = self._search_regex(
2101                                 ASSETS_RE, embed_webpage, 'JS player URL (2)', default=None)
2102
2103                             if not jsplayer_url_json:
2104                                 jsplayer_url_json = self._search_regex(
2105                                     r'"WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER":.+?"jsUrl":\s*("[^"]+")',
2106                                     embed_webpage,
2107                                     'JS player URL')
2108
2109                         player_url = json.loads(jsplayer_url_json)
2110                         if player_url is None:
2111                             player_url_json = self._search_regex(
2112                                 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2113                                 video_webpage, 'age gate player URL')
2114                             player_url = json.loads(player_url_json)
2115
2116                     if 'sig' in url_data:
2117                         url += '&signature=' + url_data['sig'][0]
2118                     elif 's' in url_data:
2119                         encrypted_sig = url_data['s'][0]
2120
2121                         if self._downloader.params.get('verbose'):
2122                             if player_url is None:
2123                                 player_desc = 'unknown'
2124                             else:
2125                                 player_type, player_version = self._extract_player_info(player_url)
2126                                 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
2127                             parts_sizes = self._signature_cache_id(encrypted_sig)
2128                             self.to_screen('{%s} signature length %s, %s' %
2129                                            (format_id, parts_sizes, player_desc))
2130
2131                         signature = self._decrypt_signature(
2132                             encrypted_sig, video_id, player_url, age_gate)
2133                         sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2134                         url += '&%s=%s' % (sp, signature)
2135                 if 'ratebypass' not in url:
2136                     url += '&ratebypass=yes'
2137
2138                 dct = {
2139                     'format_id': format_id,
2140                     'url': url,
2141                     'player_url': player_url,
2142                 }
2143                 if format_id in self._formats:
2144                     dct.update(self._formats[format_id])
2145                 if format_id in formats_spec:
2146                     dct.update(formats_spec[format_id])
2147
2148                 # Some itags are not included in DASH manifest thus corresponding formats will
2149                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2150                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2151                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2152                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2153
2154                 if width is None:
2155                     width = int_or_none(fmt.get('width'))
2156                 if height is None:
2157                     height = int_or_none(fmt.get('height'))
2158
2159                 filesize = int_or_none(url_data.get(
2160                     'clen', [None])[0]) or _extract_filesize(url)
2161
2162                 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2163                 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2164
2165                 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2166                        or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
2167                 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
2168
2169                 more_fields = {
2170                     'filesize': filesize,
2171                     'tbr': tbr,
2172                     'width': width,
2173                     'height': height,
2174                     'fps': fps,
2175                     'format_note': quality_label or quality,
2176                 }
2177                 for key, value in more_fields.items():
2178                     if value:
2179                         dct[key] = value
2180                 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
2181                 if type_:
2182                     type_split = type_.split(';')
2183                     kind_ext = type_split[0].split('/')
2184                     if len(kind_ext) == 2:
2185                         kind, _ = kind_ext
2186                         dct['ext'] = mimetype2ext(type_split[0])
2187                         if kind in ('audio', 'video'):
2188                             codecs = None
2189                             for mobj in re.finditer(
2190                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2191                                 if mobj.group('key') == 'codecs':
2192                                     codecs = mobj.group('val')
2193                                     break
2194                             if codecs:
2195                                 dct.update(parse_codecs(codecs))
2196                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2197                     dct['downloader_options'] = {
2198                         # Youtube throttles chunks >~10M
2199                         'http_chunk_size': 10485760,
2200                     }
2201                 formats.append(dct)
2202         else:
2203             manifest_url = (
2204                 url_or_none(try_get(
2205                     player_response,
2206                     lambda x: x['streamingData']['hlsManifestUrl'],
2207                     compat_str))
2208                 or url_or_none(try_get(
2209                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2210             if manifest_url:
2211                 formats = []
2212                 m3u8_formats = self._extract_m3u8_formats(
2213                     manifest_url, video_id, 'mp4', fatal=False)
2214                 for a_format in m3u8_formats:
2215                     itag = self._search_regex(
2216                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2217                     if itag:
2218                         a_format['format_id'] = itag
2219                         if itag in self._formats:
2220                             dct = self._formats[itag].copy()
2221                             dct.update(a_format)
2222                             a_format = dct
2223                     a_format['player_url'] = player_url
2224                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2225                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2226                     formats.append(a_format)
2227             else:
2228                 error_message = extract_unavailable_message()
2229                 if not error_message:
2230                     error_message = clean_html(try_get(
2231                         player_response, lambda x: x['playabilityStatus']['reason'],
2232                         compat_str))
2233                 if not error_message:
2234                     error_message = clean_html(
2235                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2236                 if error_message:
2237                     raise ExtractorError(error_message, expected=True)
2238                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2239
2240         # uploader
2241         video_uploader = try_get(
2242             video_info, lambda x: x['author'][0],
2243             compat_str) or str_or_none(video_details.get('author'))
2244         if video_uploader:
2245             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2246         else:
2247             self._downloader.report_warning('unable to extract uploader name')
2248
2249         # uploader_id
2250         video_uploader_id = None
2251         video_uploader_url = None
2252         mobj = re.search(
2253             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2254             video_webpage)
2255         if mobj is not None:
2256             video_uploader_id = mobj.group('uploader_id')
2257             video_uploader_url = mobj.group('uploader_url')
2258         else:
2259             owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2260             if owner_profile_url:
2261                 video_uploader_id = self._search_regex(
2262                     r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2263                     default=None)
2264                 video_uploader_url = owner_profile_url
2265
2266         channel_id = (
2267             str_or_none(video_details.get('channelId'))
2268             or self._html_search_meta(
2269                 'channelId', video_webpage, 'channel id', default=None)
2270             or self._search_regex(
2271                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2272                 video_webpage, 'channel id', default=None, group='id'))
2273         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2274
2275         thumbnails = []
2276         thumbnails_list = try_get(
2277             video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2278         for t in thumbnails_list:
2279             if not isinstance(t, dict):
2280                 continue
2281             thumbnail_url = url_or_none(t.get('url'))
2282             if not thumbnail_url:
2283                 continue
2284             thumbnails.append({
2285                 'url': thumbnail_url,
2286                 'width': int_or_none(t.get('width')),
2287                 'height': int_or_none(t.get('height')),
2288             })
2289
2290         if not thumbnails:
2291             video_thumbnail = None
2292             # We try first to get a high quality image:
2293             m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2294                                 video_webpage, re.DOTALL)
2295             if m_thumb is not None:
2296                 video_thumbnail = m_thumb.group(1)
2297             thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2298             if thumbnail_url:
2299                 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2300             if video_thumbnail:
2301                 thumbnails.append({'url': video_thumbnail})
2302
2303         # upload date
2304         upload_date = self._html_search_meta(
2305             'datePublished', video_webpage, 'upload date', default=None)
2306         if not upload_date:
2307             upload_date = self._search_regex(
2308                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2309                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2310                 video_webpage, 'upload date', default=None)
2311         if not upload_date:
2312             upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
2313         upload_date = unified_strdate(upload_date)
2314
2315         video_license = self._html_search_regex(
2316             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2317             video_webpage, 'license', default=None)
2318
2319         m_music = re.search(
2320             r'''(?x)
2321                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2322                 <ul[^>]*>\s*
2323                 <li>(?P<title>.+?)
2324                 by (?P<creator>.+?)
2325                 (?:
2326                     \(.+?\)|
2327                     <a[^>]*
2328                         (?:
2329                             \bhref=["\']/red[^>]*>|             # drop possible
2330                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2331                         )
2332                     .*?
2333                 )?</li
2334             ''',
2335             video_webpage)
2336         if m_music:
2337             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2338             video_creator = clean_html(m_music.group('creator'))
2339         else:
2340             video_alt_title = video_creator = None
2341
2342         def extract_meta(field):
2343             return self._html_search_regex(
2344                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2345                 video_webpage, field, default=None)
2346
2347         track = extract_meta('Song')
2348         artist = extract_meta('Artist')
2349         album = extract_meta('Album')
2350
2351         # Youtube Music Auto-generated description
2352         release_date = release_year = None
2353         if video_description:
2354             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2355             if mobj:
2356                 if not track:
2357                     track = mobj.group('track').strip()
2358                 if not artist:
2359                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2360                 if not album:
2361                     album = mobj.group('album'.strip())
2362                 release_year = mobj.group('release_year')
2363                 release_date = mobj.group('release_date')
2364                 if release_date:
2365                     release_date = release_date.replace('-', '')
2366                     if not release_year:
2367                         release_year = int(release_date[:4])
2368                 if release_year:
2369                     release_year = int(release_year)
2370
2371         m_episode = re.search(
2372             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2373             video_webpage)
2374         if m_episode:
2375             series = unescapeHTML(m_episode.group('series'))
2376             season_number = int(m_episode.group('season'))
2377             episode_number = int(m_episode.group('episode'))
2378         else:
2379             series = season_number = episode_number = None
2380
2381         m_cat_container = self._search_regex(
2382             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2383             video_webpage, 'categories', default=None)
2384         category = None
2385         if m_cat_container:
2386             category = self._html_search_regex(
2387                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2388                 default=None)
2389         if not category:
2390             category = try_get(
2391                 microformat, lambda x: x['category'], compat_str)
2392         video_categories = None if category is None else [category]
2393
2394         video_tags = [
2395             unescapeHTML(m.group('content'))
2396             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2397         if not video_tags:
2398             video_tags = try_get(video_details, lambda x: x['keywords'], list)
2399
2400         def _extract_count(count_name):
2401             return str_to_int(self._search_regex(
2402                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2403                 % re.escape(count_name),
2404                 video_webpage, count_name, default=None))
2405
2406         like_count = _extract_count('like')
2407         dislike_count = _extract_count('dislike')
2408
2409         if view_count is None:
2410             view_count = str_to_int(self._search_regex(
2411                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2412                 'view count', default=None))
2413
2414         average_rating = (
2415             float_or_none(video_details.get('averageRating'))
2416             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2417
2418         # subtitles
2419         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2420         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2421
2422         video_duration = try_get(
2423             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2424         if not video_duration:
2425             video_duration = int_or_none(video_details.get('lengthSeconds'))
2426         if not video_duration:
2427             video_duration = parse_duration(self._html_search_meta(
2428                 'duration', video_webpage, 'video duration'))
2429
2430         # annotations
2431         video_annotations = None
2432         if self._downloader.params.get('writeannotations', False):
2433             xsrf_token = self._search_regex(
2434                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2435                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2436             invideo_url = try_get(
2437                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2438             if xsrf_token and invideo_url:
2439                 xsrf_field_name = self._search_regex(
2440                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2441                     video_webpage, 'xsrf field name',
2442                     group='xsrf_field_name', default='session_token')
2443                 video_annotations = self._download_webpage(
2444                     self._proto_relative_url(invideo_url),
2445                     video_id, note='Downloading annotations',
2446                     errnote='Unable to download video annotations', fatal=False,
2447                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2448
2449         chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
2450
2451         # Look for the DASH manifest
2452         if self._downloader.params.get('youtube_include_dash_manifest', True):
2453             dash_mpd_fatal = True
2454             for mpd_url in dash_mpds:
2455                 dash_formats = {}
2456                 try:
2457                     def decrypt_sig(mobj):
2458                         s = mobj.group(1)
2459                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2460                         return '/signature/%s' % dec_s
2461
2462                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2463
2464                     for df in self._extract_mpd_formats(
2465                             mpd_url, video_id, fatal=dash_mpd_fatal,
2466                             formats_dict=self._formats):
2467                         if not df.get('filesize'):
2468                             df['filesize'] = _extract_filesize(df['url'])
2469                         # Do not overwrite DASH format found in some previous DASH manifest
2470                         if df['format_id'] not in dash_formats:
2471                             dash_formats[df['format_id']] = df
2472                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2473                         # allow them to fail without bug report message if we already have
2474                         # some DASH manifest succeeded. This is temporary workaround to reduce
2475                         # burst of bug reports until we figure out the reason and whether it
2476                         # can be fixed at all.
2477                         dash_mpd_fatal = False
2478                 except (ExtractorError, KeyError) as e:
2479                     self.report_warning(
2480                         'Skipping DASH manifest: %r' % e, video_id)
2481                 if dash_formats:
2482                     # Remove the formats we found through non-DASH, they
2483                     # contain less info and it can be wrong, because we use
2484                     # fixed values (for example the resolution). See
2485                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2486                     # example.
2487                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2488                     formats.extend(dash_formats.values())
2489
2490         # Check for malformed aspect ratio
2491         stretched_m = re.search(
2492             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2493             video_webpage)
2494         if stretched_m:
2495             w = float(stretched_m.group('w'))
2496             h = float(stretched_m.group('h'))
2497             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2498             # We will only process correct ratios.
2499             if w > 0 and h > 0:
2500                 ratio = w / h
2501                 for f in formats:
2502                     if f.get('vcodec') != 'none':
2503                         f['stretched_ratio'] = ratio
2504
2505         if not formats:
2506             if 'reason' in video_info:
2507                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2508                     regions_allowed = self._html_search_meta(
2509                         'regionsAllowed', video_webpage, default=None)
2510                     countries = regions_allowed.split(',') if regions_allowed else None
2511                     self.raise_geo_restricted(
2512                         msg=video_info['reason'][0], countries=countries)
2513                 reason = video_info['reason'][0]
2514                 if 'Invalid parameters' in reason:
2515                     unavailable_message = extract_unavailable_message()
2516                     if unavailable_message:
2517                         reason = unavailable_message
2518                 raise ExtractorError(
2519                     'YouTube said: %s' % reason,
2520                     expected=True, video_id=video_id)
2521             if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2522                 raise ExtractorError('This video is DRM protected.', expected=True)
2523
2524         self._sort_formats(formats)
2525
2526         self.mark_watched(video_id, video_info, player_response)
2527
2528         return {
2529             'id': video_id,
2530             'uploader': video_uploader,
2531             'uploader_id': video_uploader_id,
2532             'uploader_url': video_uploader_url,
2533             'channel_id': channel_id,
2534             'channel_url': channel_url,
2535             'upload_date': upload_date,
2536             'license': video_license,
2537             'creator': video_creator or artist,
2538             'title': video_title,
2539             'alt_title': video_alt_title or track,
2540             'thumbnails': thumbnails,
2541             'description': video_description,
2542             'categories': video_categories,
2543             'tags': video_tags,
2544             'subtitles': video_subtitles,
2545             'automatic_captions': automatic_captions,
2546             'duration': video_duration,
2547             'age_limit': 18 if age_gate else 0,
2548             'annotations': video_annotations,
2549             'chapters': chapters,
2550             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2551             'view_count': view_count,
2552             'like_count': like_count,
2553             'dislike_count': dislike_count,
2554             'average_rating': average_rating,
2555             'formats': formats,
2556             'is_live': is_live,
2557             'start_time': start_time,
2558             'end_time': end_time,
2559             'series': series,
2560             'season_number': season_number,
2561             'episode_number': episode_number,
2562             'track': track,
2563             'artist': artist,
2564             'album': album,
2565             'release_date': release_date,
2566             'release_year': release_year,
2567         }
2568
2569
2570 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2571     IE_DESC = 'YouTube.com playlists'
2572     _VALID_URL = r"""(?x)(?:
2573                         (?:https?://)?
2574                         (?:\w+\.)?
2575                         (?:
2576                             (?:
2577                                 youtube(?:kids)?\.com|
2578                                 invidio\.us
2579                             )
2580                             /
2581                             (?:
2582                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2583                                \? (?:.*?[&;])*? (?:p|a|list)=
2584                             |  p/
2585                             )|
2586                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2587                         )
2588                         (
2589                             (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2590                             # Top tracks, they can also include dots
2591                             |(?:MC)[\w\.]*
2592                         )
2593                         .*
2594                      |
2595                         (%(playlist_id)s)
2596                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2597     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2598     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2599     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2600     IE_NAME = 'youtube:playlist'
2601     _TESTS = [{
2602         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2603         'info_dict': {
2604             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2605             'uploader': 'Sergey M.',
2606             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2607             'title': 'youtube-dl public playlist',
2608         },
2609         'playlist_count': 1,
2610     }, {
2611         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2612         'info_dict': {
2613             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2614             'uploader': 'Sergey M.',
2615             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2616             'title': 'youtube-dl empty playlist',
2617         },
2618         'playlist_count': 0,
2619     }, {
2620         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2621         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2622         'info_dict': {
2623             'title': '29C3: Not my department',
2624             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2625             'uploader': 'Christiaan008',
2626             'uploader_id': 'ChRiStIaAn008',
2627         },
2628         'playlist_count': 96,
2629     }, {
2630         'note': 'issue #673',
2631         'url': 'PLBB231211A4F62143',
2632         'info_dict': {
2633             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2634             'id': 'PLBB231211A4F62143',
2635             'uploader': 'Wickydoo',
2636             'uploader_id': 'Wickydoo',
2637         },
2638         'playlist_mincount': 26,
2639     }, {
2640         'note': 'Large playlist',
2641         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2642         'info_dict': {
2643             'title': 'Uploads from Cauchemar',
2644             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2645             'uploader': 'Cauchemar',
2646             'uploader_id': 'Cauchemar89',
2647         },
2648         'playlist_mincount': 799,
2649     }, {
2650         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2651         'info_dict': {
2652             'title': 'YDL_safe_search',
2653             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2654         },
2655         'playlist_count': 2,
2656         'skip': 'This playlist is private',
2657     }, {
2658         'note': 'embedded',
2659         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2660         'playlist_count': 4,
2661         'info_dict': {
2662             'title': 'JODA15',
2663             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2664             'uploader': 'milan',
2665             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2666         }
2667     }, {
2668         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2669         'playlist_mincount': 485,
2670         'info_dict': {
2671             'title': '2018 Chinese New Singles (11/6 updated)',
2672             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2673             'uploader': 'LBK',
2674             'uploader_id': 'sdragonfang',
2675         }
2676     }, {
2677         'note': 'Embedded SWF player',
2678         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2679         'playlist_count': 4,
2680         'info_dict': {
2681             'title': 'JODA7',
2682             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2683         },
2684         'skip': 'This playlist does not exist',
2685     }, {
2686         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2687         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2688         'info_dict': {
2689             'title': 'Uploads from Interstellar Movie',
2690             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2691             'uploader': 'Interstellar Movie',
2692             'uploader_id': 'InterstellarMovie1',
2693         },
2694         'playlist_mincount': 21,
2695     }, {
2696         # Playlist URL that does not actually serve a playlist
2697         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2698         'info_dict': {
2699             'id': 'FqZTN594JQw',
2700             'ext': 'webm',
2701             'title': "Smiley's People 01 detective, Adventure Series, Action",
2702             'uploader': 'STREEM',
2703             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2704             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2705             'upload_date': '20150526',
2706             'license': 'Standard YouTube License',
2707             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2708             'categories': ['People & Blogs'],
2709             'tags': list,
2710             'view_count': int,
2711             'like_count': int,
2712             'dislike_count': int,
2713         },
2714         'params': {
2715             'skip_download': True,
2716         },
2717         'skip': 'This video is not available.',
2718         'add_ie': [YoutubeIE.ie_key()],
2719     }, {
2720         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2721         'info_dict': {
2722             'id': 'yeWKywCrFtk',
2723             'ext': 'mp4',
2724             'title': 'Small Scale Baler and Braiding Rugs',
2725             'uploader': 'Backus-Page House Museum',
2726             'uploader_id': 'backuspagemuseum',
2727             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2728             'upload_date': '20161008',
2729             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2730             'categories': ['Nonprofits & Activism'],
2731             'tags': list,
2732             'like_count': int,
2733             'dislike_count': int,
2734         },
2735         'params': {
2736             'noplaylist': True,
2737             'skip_download': True,
2738         },
2739     }, {
2740         # https://github.com/ytdl-org/youtube-dl/issues/21844
2741         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2742         'info_dict': {
2743             'title': 'Data Analysis with Dr Mike Pound',
2744             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2745             'uploader_id': 'Computerphile',
2746             'uploader': 'Computerphile',
2747         },
2748         'playlist_mincount': 11,
2749     }, {
2750         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2751         'only_matching': True,
2752     }, {
2753         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2754         'only_matching': True,
2755     }, {
2756         # music album playlist
2757         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2758         'only_matching': True,
2759     }, {
2760         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2761         'only_matching': True,
2762     }, {
2763         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2764         'only_matching': True,
2765     }]
2766
2767     def _real_initialize(self):
2768         self._login()
2769
2770     def extract_videos_from_page(self, page):
2771         ids_in_page = []
2772         titles_in_page = []
2773
2774         for item in re.findall(
2775                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2776             attrs = extract_attributes(item)
2777             video_id = attrs['data-video-id']
2778             video_title = unescapeHTML(attrs.get('data-title'))
2779             if video_title:
2780                 video_title = video_title.strip()
2781             ids_in_page.append(video_id)
2782             titles_in_page.append(video_title)
2783
2784         # Fallback with old _VIDEO_RE
2785         self.extract_videos_from_page_impl(
2786             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2787
2788         # Relaxed fallbacks
2789         self.extract_videos_from_page_impl(
2790             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2791             ids_in_page, titles_in_page)
2792         self.extract_videos_from_page_impl(
2793             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2794             ids_in_page, titles_in_page)
2795
2796         return zip(ids_in_page, titles_in_page)
2797
2798     def _extract_mix(self, playlist_id):
2799         # The mixes are generated from a single video
2800         # the id of the playlist is just 'RD' + video_id
2801         ids = []
2802         last_id = playlist_id[-11:]
2803         for n in itertools.count(1):
2804             url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2805             webpage = self._download_webpage(
2806                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2807             new_ids = orderedSet(re.findall(
2808                 r'''(?xs)data-video-username=".*?".*?
2809                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2810                 webpage))
2811             # Fetch new pages until all the videos are repeated, it seems that
2812             # there are always 51 unique videos.
2813             new_ids = [_id for _id in new_ids if _id not in ids]
2814             if not new_ids:
2815                 break
2816             ids.extend(new_ids)
2817             last_id = ids[-1]
2818
2819         url_results = self._ids_to_results(ids)
2820
2821         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2822         title_span = (
2823             search_title('playlist-title')
2824             or search_title('title long-title')
2825             or search_title('title'))
2826         title = clean_html(title_span)
2827
2828         return self.playlist_result(url_results, playlist_id, title)
2829
2830     def _extract_playlist(self, playlist_id):
2831         url = self._TEMPLATE_URL % playlist_id
2832         page = self._download_webpage(url, playlist_id)
2833
2834         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2835         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2836             match = match.strip()
2837             # Check if the playlist exists or is private
2838             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2839             if mobj:
2840                 reason = mobj.group('reason')
2841                 message = 'This playlist %s' % reason
2842                 if 'private' in reason:
2843                     message += ', use --username or --netrc to access it'
2844                 message += '.'
2845                 raise ExtractorError(message, expected=True)
2846             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2847                 raise ExtractorError(
2848                     'Invalid parameters. Maybe URL is incorrect.',
2849                     expected=True)
2850             elif re.match(r'[^<]*Choose your language[^<]*', match):
2851                 continue
2852             else:
2853                 self.report_warning('Youtube gives an alert message: ' + match)
2854
2855         playlist_title = self._html_search_regex(
2856             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2857             page, 'title', default=None)
2858
2859         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2860         uploader = self._html_search_regex(
2861             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2862             page, 'uploader', default=None)
2863         mobj = re.search(
2864             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2865             page)
2866         if mobj:
2867             uploader_id = mobj.group('uploader_id')
2868             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2869         else:
2870             uploader_id = uploader_url = None
2871
2872         has_videos = True
2873
2874         if not playlist_title:
2875             try:
2876                 # Some playlist URLs don't actually serve a playlist (e.g.
2877                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2878                 next(self._entries(page, playlist_id))
2879             except StopIteration:
2880                 has_videos = False
2881
2882         playlist = self.playlist_result(
2883             self._entries(page, playlist_id), playlist_id, playlist_title)
2884         playlist.update({
2885             'uploader': uploader,
2886             'uploader_id': uploader_id,
2887             'uploader_url': uploader_url,
2888         })
2889
2890         return has_videos, playlist
2891
2892     def _check_download_just_video(self, url, playlist_id):
2893         # Check if it's a video-specific URL
2894         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2895         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2896             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2897             'video id', default=None)
2898         if video_id:
2899             if self._downloader.params.get('noplaylist'):
2900                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2901                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2902             else:
2903                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2904                 return video_id, None
2905         return None, None
2906
2907     def _real_extract(self, url):
2908         # Extract playlist id
2909         mobj = re.match(self._VALID_URL, url)
2910         if mobj is None:
2911             raise ExtractorError('Invalid URL: %s' % url)
2912         playlist_id = mobj.group(1) or mobj.group(2)
2913
2914         video_id, video = self._check_download_just_video(url, playlist_id)
2915         if video:
2916             return video
2917
2918         if playlist_id.startswith(('RD', 'UL', 'PU')):
2919             # Mixes require a custom extraction process
2920             return self._extract_mix(playlist_id)
2921
2922         has_videos, playlist = self._extract_playlist(playlist_id)
2923         if has_videos or not video_id:
2924             return playlist
2925
2926         # Some playlist URLs don't actually serve a playlist (see
2927         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2928         # Fallback to plain video extraction if there is a video id
2929         # along with playlist id.
2930         return self.url_result(video_id, 'Youtube', video_id=video_id)
2931
2932
2933 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2934     IE_DESC = 'YouTube.com channels'
2935     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2936     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2937     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2938     IE_NAME = 'youtube:channel'
2939     _TESTS = [{
2940         'note': 'paginated channel',
2941         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2942         'playlist_mincount': 91,
2943         'info_dict': {
2944             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2945             'title': 'Uploads from lex will',
2946             'uploader': 'lex will',
2947             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2948         }
2949     }, {
2950         'note': 'Age restricted channel',
2951         # from https://www.youtube.com/user/DeusExOfficial
2952         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2953         'playlist_mincount': 64,
2954         'info_dict': {
2955             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2956             'title': 'Uploads from Deus Ex',
2957             'uploader': 'Deus Ex',
2958             'uploader_id': 'DeusExOfficial',
2959         },
2960     }, {
2961         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2962         'only_matching': True,
2963     }, {
2964         'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2965         'only_matching': True,
2966     }]
2967
2968     @classmethod
2969     def suitable(cls, url):
2970         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2971                 else super(YoutubeChannelIE, cls).suitable(url))
2972
2973     def _build_template_url(self, url, channel_id):
2974         return self._TEMPLATE_URL % channel_id
2975
2976     def _real_extract(self, url):
2977         channel_id = self._match_id(url)
2978
2979         url = self._build_template_url(url, channel_id)
2980
2981         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2982         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2983         # otherwise fallback on channel by page extraction
2984         channel_page = self._download_webpage(
2985             url + '?view=57', channel_id,
2986             'Downloading channel page', fatal=False)
2987         if channel_page is False:
2988             channel_playlist_id = False
2989         else:
2990             channel_playlist_id = self._html_search_meta(
2991                 'channelId', channel_page, 'channel id', default=None)
2992             if not channel_playlist_id:
2993                 channel_url = self._html_search_meta(
2994                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2995                     channel_page, 'channel url', default=None)
2996                 if channel_url:
2997                     channel_playlist_id = self._search_regex(
2998                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2999                         channel_url, 'channel id', default=None)
3000         if channel_playlist_id and channel_playlist_id.startswith('UC'):
3001             playlist_id = 'UU' + channel_playlist_id[2:]
3002             return self.url_result(
3003                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
3004
3005         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
3006         autogenerated = re.search(r'''(?x)
3007                 class="[^"]*?(?:
3008                     channel-header-autogenerated-label|
3009                     yt-channel-title-autogenerated
3010                 )[^"]*"''', channel_page) is not None
3011
3012         if autogenerated:
3013             # The videos are contained in a single page
3014             # the ajax pages can't be used, they are empty
3015             entries = [
3016                 self.url_result(
3017                     video_id, 'Youtube', video_id=video_id,
3018                     video_title=video_title)
3019                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
3020             return self.playlist_result(entries, channel_id)
3021
3022         try:
3023             next(self._entries(channel_page, channel_id))
3024         except StopIteration:
3025             alert_message = self._html_search_regex(
3026                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3027                 channel_page, 'alert', default=None, group='alert')
3028             if alert_message:
3029                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3030
3031         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
3032
3033
3034 class YoutubeUserIE(YoutubeChannelIE):
3035     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
3036     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
3037     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
3038     IE_NAME = 'youtube:user'
3039
3040     _TESTS = [{
3041         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3042         'playlist_mincount': 320,
3043         'info_dict': {
3044             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3045             'title': 'Uploads from The Linux Foundation',
3046             'uploader': 'The Linux Foundation',
3047             'uploader_id': 'TheLinuxFoundation',
3048         }
3049     }, {
3050         # Only available via https://www.youtube.com/c/12minuteathlete/videos
3051         # but not https://www.youtube.com/user/12minuteathlete/videos
3052         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3053         'playlist_mincount': 249,
3054         'info_dict': {
3055             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3056             'title': 'Uploads from 12 Minute Athlete',
3057             'uploader': '12 Minute Athlete',
3058             'uploader_id': 'the12minuteathlete',
3059         }
3060     }, {
3061         'url': 'ytuser:phihag',
3062         'only_matching': True,
3063     }, {
3064         'url': 'https://www.youtube.com/c/gametrailers',
3065         'only_matching': True,
3066     }, {
3067         'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3068         'only_matching': True,
3069     }, {
3070         'url': 'https://www.youtube.com/gametrailers',
3071         'only_matching': True,
3072     }, {
3073         # This channel is not available, geo restricted to JP
3074         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3075         'only_matching': True,
3076     }]
3077
3078     @classmethod
3079     def suitable(cls, url):
3080         # Don't return True if the url can be extracted with other youtube
3081         # extractor, the regex would is too permissive and it would match.
3082         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3083         if any(ie.suitable(url) for ie in other_yt_ies):
3084             return False
3085         else:
3086             return super(YoutubeUserIE, cls).suitable(url)
3087
3088     def _build_template_url(self, url, channel_id):
3089         mobj = re.match(self._VALID_URL, url)
3090         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3091
3092
3093 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3094     IE_DESC = 'YouTube.com live streams'
3095     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
3096     IE_NAME = 'youtube:live'
3097
3098     _TESTS = [{
3099         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3100         'info_dict': {
3101             'id': 'a48o2S1cPoo',
3102             'ext': 'mp4',
3103             'title': 'The Young Turks - Live Main Show',
3104             'uploader': 'The Young Turks',
3105             'uploader_id': 'TheYoungTurks',
3106             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3107             'upload_date': '20150715',
3108             'license': 'Standard YouTube License',
3109             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3110             'categories': ['News & Politics'],
3111             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3112             'like_count': int,
3113             'dislike_count': int,
3114         },
3115         'params': {
3116             'skip_download': True,
3117         },
3118     }, {
3119         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3120         'only_matching': True,
3121     }, {
3122         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3123         'only_matching': True,
3124     }, {
3125         'url': 'https://www.youtube.com/TheYoungTurks/live',
3126         'only_matching': True,
3127     }]
3128
3129     def _real_extract(self, url):
3130         mobj = re.match(self._VALID_URL, url)
3131         channel_id = mobj.group('id')
3132         base_url = mobj.group('base_url')
3133         webpage = self._download_webpage(url, channel_id, fatal=False)
3134         if webpage:
3135             page_type = self._og_search_property(
3136                 'type', webpage, 'page type', default='')
3137             video_id = self._html_search_meta(
3138                 'videoId', webpage, 'video id', default=None)
3139             if page_type.startswith('video') and video_id and re.match(
3140                     r'^[0-9A-Za-z_-]{11}$', video_id):
3141                 return self.url_result(video_id, YoutubeIE.ie_key())
3142         return self.url_result(base_url)
3143
3144
3145 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3146     IE_DESC = 'YouTube.com user/channel playlists'
3147     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
3148     IE_NAME = 'youtube:playlists'
3149
3150     _TESTS = [{
3151         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3152         'playlist_mincount': 4,
3153         'info_dict': {
3154             'id': 'ThirstForScience',
3155             'title': 'ThirstForScience',
3156         },
3157     }, {
3158         # with "Load more" button
3159         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3160         'playlist_mincount': 70,
3161         'info_dict': {
3162             'id': 'igorkle1',
3163             'title': 'Игорь Клейнер',
3164         },
3165     }, {
3166         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3167         'playlist_mincount': 17,
3168         'info_dict': {
3169             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3170             'title': 'Chem Player',
3171         },
3172         'skip': 'Blocked',
3173     }, {
3174         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3175         'only_matching': True,
3176     }]
3177
3178
3179 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3180     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3181
3182
3183 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3184     IE_DESC = 'YouTube.com searches'
3185     # there doesn't appear to be a real limit, for example if you search for
3186     # 'python' you get more than 8.000.000 results
3187     _MAX_RESULTS = float('inf')
3188     IE_NAME = 'youtube:search'
3189     _SEARCH_KEY = 'ytsearch'
3190     _SEARCH_PARAMS = None
3191     _TESTS = []
3192
3193     def _entries(self, query, n):
3194         data = {
3195             'context': {
3196                 'client': {
3197                     'clientName': 'WEB',
3198                     'clientVersion': '2.20201021.03.00',
3199                 }
3200             },
3201             'query': query,
3202         }
3203         if self._SEARCH_PARAMS:
3204             data['params'] = self._SEARCH_PARAMS
3205         total = 0
3206         for page_num in itertools.count(1):
3207             search = self._download_json(
3208                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3209                 video_id='query "%s"' % query,
3210                 note='Downloading page %s' % page_num,
3211                 errnote='Unable to download API page', fatal=False,
3212                 data=json.dumps(data).encode('utf8'),
3213                 headers={'content-type': 'application/json'})
3214             if not search:
3215                 break
3216             slr_contents = try_get(
3217                 search,
3218                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3219                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3220                 list)
3221             if not slr_contents:
3222                 break
3223             isr_contents = try_get(
3224                 slr_contents,
3225                 lambda x: x[0]['itemSectionRenderer']['contents'],
3226                 list)
3227             if not isr_contents:
3228                 break
3229             for content in isr_contents:
3230                 if not isinstance(content, dict):
3231                     continue
3232                 video = content.get('videoRenderer')
3233                 if not isinstance(video, dict):
3234                     continue
3235                 video_id = video.get('videoId')
3236                 if not video_id:
3237                     continue
3238                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3239                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3240                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3241                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3242                 view_count = int_or_none(self._search_regex(
3243                     r'^(\d+)', re.sub(r'\s', '', view_count_text),
3244                     'view count', default=None))
3245                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3246                 total += 1
3247                 yield {
3248                     '_type': 'url_transparent',
3249                     'ie_key': YoutubeIE.ie_key(),
3250                     'id': video_id,
3251                     'url': video_id,
3252                     'title': title,
3253                     'description': description,
3254                     'duration': duration,
3255                     'view_count': view_count,
3256                     'uploader': uploader,
3257                 }
3258                 if total == n:
3259                     return
3260             token = try_get(
3261                 slr_contents,
3262                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3263                 compat_str)
3264             if not token:
3265                 break
3266             data['continuation'] = token
3267
3268     def _get_n_results(self, query, n):
3269         """Get a specified number of results for a query"""
3270         return self.playlist_result(self._entries(query, n), query)
3271
3272
3273 class YoutubeSearchDateIE(YoutubeSearchIE):
3274     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3275     _SEARCH_KEY = 'ytsearchdate'
3276     IE_DESC = 'YouTube.com searches, newest videos first'
3277     _SEARCH_PARAMS = 'CAI%3D'
3278
3279
3280 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3281     IE_DESC = 'YouTube.com search URLs'
3282     IE_NAME = 'youtube:search_url'
3283     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3284     _TESTS = [{
3285         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3286         'playlist_mincount': 5,
3287         'info_dict': {
3288             'title': 'youtube-dl test video',
3289         }
3290     }, {
3291         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3292         'only_matching': True,
3293     }]
3294
3295     def _real_extract(self, url):
3296         mobj = re.match(self._VALID_URL, url)
3297         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3298         webpage = self._download_webpage(url, query)
3299         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3300
3301
3302 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3303     IE_DESC = 'YouTube.com (multi-season) shows'
3304     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3305     IE_NAME = 'youtube:show'
3306     _TESTS = [{
3307         'url': 'https://www.youtube.com/show/airdisasters',
3308         'playlist_mincount': 5,
3309         'info_dict': {
3310             'id': 'airdisasters',
3311             'title': 'Air Disasters',
3312         }
3313     }]
3314
3315     def _real_extract(self, url):
3316         playlist_id = self._match_id(url)
3317         return super(YoutubeShowIE, self)._real_extract(
3318             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3319
3320
3321 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3322     """
3323     Base class for feed extractors
3324     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3325     """
3326     _LOGIN_REQUIRED = True
3327
3328     @property
3329     def IE_NAME(self):
3330         return 'youtube:%s' % self._FEED_NAME
3331
3332     def _real_initialize(self):
3333         self._login()
3334
3335     def _entries(self, page):
3336         # The extraction process is the same as for playlists, but the regex
3337         # for the video ids doesn't contain an index
3338         ids = []
3339         more_widget_html = content_html = page
3340         for page_num in itertools.count(1):
3341             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3342
3343             # 'recommended' feed has infinite 'load more' and each new portion spins
3344             # the same videos in (sometimes) slightly different order, so we'll check
3345             # for unicity and break when portion has no new videos
3346             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3347             if not new_ids:
3348                 break
3349
3350             ids.extend(new_ids)
3351
3352             for entry in self._ids_to_results(new_ids):
3353                 yield entry
3354
3355             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3356             if not mobj:
3357                 break
3358
3359             more = self._download_json(
3360                 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3361                 'Downloading page #%s' % page_num,
3362                 transform_source=uppercase_escape,
3363                 headers=self._YOUTUBE_CLIENT_HEADERS)
3364             content_html = more['content_html']
3365             more_widget_html = more['load_more_widget_html']
3366
3367     def _real_extract(self, url):
3368         page = self._download_webpage(
3369             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3370             self._PLAYLIST_TITLE)
3371         return self.playlist_result(
3372             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3373
3374
3375 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3376     IE_NAME = 'youtube:watchlater'
3377     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3378     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3379
3380     _TESTS = [{
3381         'url': 'https://www.youtube.com/playlist?list=WL',
3382         'only_matching': True,
3383     }, {
3384         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3385         'only_matching': True,
3386     }]
3387
3388     def _real_extract(self, url):
3389         _, video = self._check_download_just_video(url, 'WL')
3390         if video:
3391             return video
3392         _, playlist = self._extract_playlist('WL')
3393         return playlist
3394
3395
3396 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3397     IE_NAME = 'youtube:favorites'
3398     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3399     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3400     _LOGIN_REQUIRED = True
3401
3402     def _real_extract(self, url):
3403         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3404         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3405         return self.url_result(playlist_id, 'YoutubePlaylist')
3406
3407
3408 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3409     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3410     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3411     _FEED_NAME = 'recommended'
3412     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3413
3414
3415 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3416     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3417     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3418     _FEED_NAME = 'subscriptions'
3419     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3420
3421
3422 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3423     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3424     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3425     _FEED_NAME = 'history'
3426     _PLAYLIST_TITLE = 'Youtube History'
3427
3428
3429 class YoutubeTruncatedURLIE(InfoExtractor):
3430     IE_NAME = 'youtube:truncated_url'
3431     IE_DESC = False  # Do not list
3432     _VALID_URL = r'''(?x)
3433         (?:https?://)?
3434         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3435         (?:watch\?(?:
3436             feature=[a-z_]+|
3437             annotation_id=annotation_[^&]+|
3438             x-yt-cl=[0-9]+|
3439             hl=[^&]*|
3440             t=[0-9]+
3441         )?
3442         |
3443             attribution_link\?a=[^&]+
3444         )
3445         $
3446     '''
3447
3448     _TESTS = [{
3449         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3450         'only_matching': True,
3451     }, {
3452         'url': 'https://www.youtube.com/watch?',
3453         'only_matching': True,
3454     }, {
3455         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3456         'only_matching': True,
3457     }, {
3458         'url': 'https://www.youtube.com/watch?feature=foo',
3459         'only_matching': True,
3460     }, {
3461         'url': 'https://www.youtube.com/watch?hl=en-GB',
3462         'only_matching': True,
3463     }, {
3464         'url': 'https://www.youtube.com/watch?t=2372',
3465         'only_matching': True,
3466     }]
3467
3468     def _real_extract(self, url):
3469         raise ExtractorError(
3470             'Did you forget to quote the URL? Remember that & is a meta '
3471             'character in most shells, so you want to put the URL in quotes, '
3472             'like  youtube-dl '
3473             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3474             ' or simply  youtube-dl BaW_jenozKc  .',
3475             expected=True)
3476
3477
3478 class YoutubeTruncatedIDIE(InfoExtractor):
3479     IE_NAME = 'youtube:truncated_id'
3480     IE_DESC = False  # Do not list
3481     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3482
3483     _TESTS = [{
3484         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3485         'only_matching': True,
3486     }]
3487
3488     def _real_extract(self, url):
3489         video_id = self._match_id(url)
3490         raise ExtractorError(
3491             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3492             expected=True)