git.bitcoin.ninja Git - youtube-dl/blob - extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     bool_or_none,
  31     clean_html,
  32     dict_get,
  33     error_to_compat_str,
  34     extract_attributes,
  35     ExtractorError,
  36     float_or_none,
  37     get_element_by_attribute,
  38     get_element_by_id,
  39     int_or_none,
  40     mimetype2ext,
  41     orderedSet,
  42     parse_codecs,
  43     parse_duration,
  44     qualities,
  45     remove_quotes,
  46     remove_start,
  47     smuggle_url,
  48     str_or_none,
  49     str_to_int,
  50     try_get,
  51     unescapeHTML,
  52     unified_strdate,
  53     unsmuggle_url,
  54     uppercase_escape,
  55     url_or_none,
  56     urlencode_postdata,
  57 )
  58
  59
  60 class YoutubeBaseInfoExtractor(InfoExtractor):
  61     """Provide base functions for Youtube extractors"""
  62     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  63     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  64
  65     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  66     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  67     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  68
  69     _NETRC_MACHINE = 'youtube'
  70     # If True it will raise an error if no login info is provided
  71     _LOGIN_REQUIRED = False
  72
  73     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  74
  75     def _set_language(self):
  76         self._set_cookie(
  77             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  78             # YouTube sets the expire time to about two months
  79             expire_time=time.time() + 2 * 30 * 24 * 3600)
  80
  81     def _ids_to_results(self, ids):
  82         return [
  83             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  84             for vid_id in ids]
  85
  86     def _login(self):
  87         """
  88         Attempt to log in to YouTube.
  89         True is returned if successful or skipped.
  90         False is returned if login failed.
  91
  92         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  93         """
  94         username, password = self._get_login_info()
  95         # No authentication to be performed
  96         if username is None:
  97             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  98                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  99             return True
 100
 101         login_page = self._download_webpage(
 102             self._LOGIN_URL, None,
 103             note='Downloading login page',
 104             errnote='unable to fetch login page', fatal=False)
 105         if login_page is False:
 106             return
 107
 108         login_form = self._hidden_inputs(login_page)
 109
 110         def req(url, f_req, note, errnote):
 111             data = login_form.copy()
 112             data.update({
 113                 'pstMsg': 1,
 114                 'checkConnection': 'youtube',
 115                 'checkedDomains': 'youtube',
 116                 'hl': 'en',
 117                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 118                 'f.req': json.dumps(f_req),
 119                 'flowName': 'GlifWebSignIn',
 120                 'flowEntry': 'ServiceLogin',
 121                 # TODO: reverse actual botguard identifier generation algo
 122                 'bgRequest': '["identifier",""]',
 123             })
 124             return self._download_json(
 125                 url, None, note=note, errnote=errnote,
 126                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 127                 fatal=False,
 128                 data=urlencode_postdata(data), headers={
 129                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 130                     'Google-Accounts-XSRF': 1,
 131                 })
 132
 133         def warn(message):
 134             self._downloader.report_warning(message)
 135
 136         lookup_req = [
 137             username,
 138             None, [], None, 'US', None, None, 2, False, True,
 139             [
 140                 None, None,
 141                 [2, 1, None, 1,
 142                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 143                  None, [], 4],
 144                 1, [None, None, []], None, None, None, True
 145             ],
 146             username,
 147         ]
 148
 149         lookup_results = req(
 150             self._LOOKUP_URL, lookup_req,
 151             'Looking up account info', 'Unable to look up account info')
 152
 153         if lookup_results is False:
 154             return False
 155
 156         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 157         if not user_hash:
 158             warn('Unable to extract user hash')
 159             return False
 160
 161         challenge_req = [
 162             user_hash,
 163             None, 1, None, [1, None, None, None, [password, None, True]],
 164             [
 165                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 166                 1, [None, None, []], None, None, None, True
 167             ]]
 168
 169         challenge_results = req(
 170             self._CHALLENGE_URL, challenge_req,
 171             'Logging in', 'Unable to log in')
 172
 173         if challenge_results is False:
 174             return
 175
 176         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 177         if login_res:
 178             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 179             warn(
 180                 'Unable to login: %s' % 'Invalid password'
 181                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 182             return False
 183
 184         res = try_get(challenge_results, lambda x: x[0][-1], list)
 185         if not res:
 186             warn('Unable to extract result entry')
 187             return False
 188
 189         login_challenge = try_get(res, lambda x: x[0][0], list)
 190         if login_challenge:
 191             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 192             if challenge_str == 'TWO_STEP_VERIFICATION':
 193                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 194                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 195                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 196                 if status == 'QUOTA_EXCEEDED':
 197                     warn('Exceeded the limit of TFA codes, try later')
 198                     return False
 199
 200                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 201                 if not tl:
 202                     warn('Unable to extract TL')
 203                     return False
 204
 205                 tfa_code = self._get_tfa_info('2-step verification code')
 206
 207                 if not tfa_code:
 208                     warn(
 209                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 210                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 211                     return False
 212
 213                 tfa_code = remove_start(tfa_code, 'G-')
 214
 215                 tfa_req = [
 216                     user_hash, None, 2, None,
 217                     [
 218                         9, None, None, None, None, None, None, None,
 219                         [None, tfa_code, True, 2]
 220                     ]]
 221
 222                 tfa_results = req(
 223                     self._TFA_URL.format(tl), tfa_req,
 224                     'Submitting TFA code', 'Unable to submit TFA code')
 225
 226                 if tfa_results is False:
 227                     return False
 228
 229                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 230                 if tfa_res:
 231                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 232                     warn(
 233                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 234                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 235                     return False
 236
 237                 check_cookie_url = try_get(
 238                     tfa_results, lambda x: x[0][-1][2], compat_str)
 239             else:
 240                 CHALLENGES = {
 241                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 242                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 243                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 244                 }
 245                 challenge = CHALLENGES.get(
 246                     challenge_str,
 247                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 248                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 249                 return False
 250         else:
 251             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 252
 253         if not check_cookie_url:
 254             warn('Unable to extract CheckCookie URL')
 255             return False
 256
 257         check_cookie_results = self._download_webpage(
 258             check_cookie_url, None, 'Checking cookie', fatal=False)
 259
 260         if check_cookie_results is False:
 261             return False
 262
 263         if 'https://myaccount.google.com/' not in check_cookie_results:
 264             warn('Unable to log in')
 265             return False
 266
 267         return True
 268
 269     def _download_webpage_handle(self, *args, **kwargs):
 270         query = kwargs.get('query', {}).copy()
 271         query['disable_polymer'] = 'true'
 272         kwargs['query'] = query
 273         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 274             *args, **compat_kwargs(kwargs))
 275
 276     def _real_initialize(self):
 277         if self._downloader is None:
 278             return
 279         self._set_language()
 280         if not self._login():
 281             return
 282
 283
 284 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 285     # Extract entries from page with "Load more" button
 286     def _entries(self, page, playlist_id):
 287         more_widget_html = content_html = page
 288         for page_num in itertools.count(1):
 289             for entry in self._process_page(content_html):
 290                 yield entry
 291
 292             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 293             if not mobj:
 294                 break
 295
 296             count = 0
 297             retries = 3
 298             while count <= retries:
 299                 try:
 300                     # Downloading page may result in intermittent 5xx HTTP error
 301                     # that is usually worked around with a retry
 302                     more = self._download_json(
 303                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 304                         'Downloading page #%s%s'
 305                         % (page_num, ' (retry #%d)' % count if count else ''),
 306                         transform_source=uppercase_escape)
 307                     break
 308                 except ExtractorError as e:
 309                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 310                         count += 1
 311                         if count <= retries:
 312                             continue
 313                     raise
 314
 315             content_html = more['content_html']
 316             if not content_html.strip():
 317                 # Some webpages show a "Load more" button but they don't
 318                 # have more videos
 319                 break
 320             more_widget_html = more['load_more_widget_html']
 321
 322
 323 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 324     def _process_page(self, content):
 325         for video_id, video_title in self.extract_videos_from_page(content):
 326             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 327
 328     def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
 329         for mobj in re.finditer(video_re, page):
 330             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 331             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 332                 continue
 333             video_id = mobj.group('id')
 334             video_title = unescapeHTML(
 335                 mobj.group('title')) if 'title' in mobj.groupdict() else None
 336             if video_title:
 337                 video_title = video_title.strip()
 338             if video_title == '► Play all':
 339                 video_title = None
 340             try:
 341                 idx = ids_in_page.index(video_id)
 342                 if video_title and not titles_in_page[idx]:
 343                     titles_in_page[idx] = video_title
 344             except ValueError:
 345                 ids_in_page.append(video_id)
 346                 titles_in_page.append(video_title)
 347
 348     def extract_videos_from_page(self, page):
 349         ids_in_page = []
 350         titles_in_page = []
 351         self.extract_videos_from_page_impl(
 352             self._VIDEO_RE, page, ids_in_page, titles_in_page)
 353         return zip(ids_in_page, titles_in_page)
 354
 355
 356 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 357     def _process_page(self, content):
 358         for playlist_id in orderedSet(re.findall(
 359                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 360                 content)):
 361             yield self.url_result(
 362                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 363
 364     def _real_extract(self, url):
 365         playlist_id = self._match_id(url)
 366         webpage = self._download_webpage(url, playlist_id)
 367         title = self._og_search_title(webpage, fatal=False)
 368         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 369
 370
 371 class YoutubeIE(YoutubeBaseInfoExtractor):
 372     IE_DESC = 'YouTube.com'
 373     _VALID_URL = r"""(?x)^
 374                      (
 375                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 376                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 377                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 378                             (?:www\.)?pwnyoutube\.com/|
 379                             (?:www\.)?hooktube\.com/|
 380                             (?:www\.)?yourepeat\.com/|
 381                             tube\.majestyc\.net/|
 382                             # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
 383                             (?:(?:www|dev)\.)?invidio\.us/|
 384                             (?:(?:www|no)\.)?invidiou\.sh/|
 385                             (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
 386                             (?:www\.)?invidious\.kabi\.tk/|
 387                             (?:www\.)?invidious\.enkirton\.net/|
 388                             (?:www\.)?invidious\.13ad\.de/|
 389                             (?:www\.)?invidious\.mastodon\.host/|
 390                             (?:www\.)?tube\.poal\.co/|
 391                             (?:www\.)?vid\.wxzm\.sx/|
 392                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 393                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 394                          (?:                                                  # the various things that can precede the ID:
 395                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 396                              |(?:                                             # or the v= param in all its forms
 397                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 398                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 399                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 400                                  v=
 401                              )
 402                          ))
 403                          |(?:
 404                             youtu\.be|                                        # just youtu.be/xxxx
 405                             vid\.plus|                                        # or vid.plus/xxxx
 406                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 407                          )/
 408                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 409                          )
 410                      )?                                                       # all until now is optional -> you can pass the naked ID
 411                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 412                      (?!.*?\blist=
 413                         (?:
 414                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 415                             WL                                                # WL are handled by the watch later IE
 416                         )
 417                      )
 418                      (?(1).+)?                                                # if we found the ID, everything can follow
 419                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 420     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 421     _formats = {
 422         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 423         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 424         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 425         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 426         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 427         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 428         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 429         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 430         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 431         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 432         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 433         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 434         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 435         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 436         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 437         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 438         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 439         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 440
 441
 442         # 3D videos
 443         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 444         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 445         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 446         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 447         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 448         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 449         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 450
 451         # Apple HTTP Live Streaming
 452         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 453         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 454         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 455         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 456         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 457         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 458         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 459         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 460
 461         # DASH mp4 video
 462         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 463         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 464         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 465         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 466         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 467         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 468         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 469         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 470         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 471         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 472         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 473         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 474
 475         # Dash mp4 audio
 476         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 477         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 478         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 479         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 480         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 481         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 482         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 483
 484         # Dash webm
 485         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 486         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 487         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 488         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 489         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 490         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 491         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 492         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 493         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 494         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 495         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 496         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 497         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 498         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 499         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 500         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 501         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 502         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 503         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 504         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 505         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 506         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 507
 508         # Dash webm audio
 509         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 510         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 511
 512         # Dash webm audio with opus inside
 513         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 514         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 515         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 516
 517         # RTMP (unnamed)
 518         '_rtmp': {'protocol': 'rtmp'},
 519
 520         # av01 video only formats sometimes served with "unknown" codecs
 521         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 522         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 523         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 524         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 525     }
 526     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 527
 528     _GEO_BYPASS = False
 529
 530     IE_NAME = 'youtube'
 531     _TESTS = [
 532         {
 533             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 534             'info_dict': {
 535                 'id': 'BaW_jenozKc',
 536                 'ext': 'mp4',
 537                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 538                 'uploader': 'Philipp Hagemeister',
 539                 'uploader_id': 'phihag',
 540                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 541                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 542                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 543                 'upload_date': '20121002',
 544                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 545                 'categories': ['Science & Technology'],
 546                 'tags': ['youtube-dl'],
 547                 'duration': 10,
 548                 'view_count': int,
 549                 'like_count': int,
 550                 'dislike_count': int,
 551                 'start_time': 1,
 552                 'end_time': 9,
 553             }
 554         },
 555         {
 556             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 557             'note': 'Test generic use_cipher_signature video (#897)',
 558             'info_dict': {
 559                 'id': 'UxxajLWwzqY',
 560                 'ext': 'mp4',
 561                 'upload_date': '20120506',
 562                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 563                 'alt_title': 'I Love It (feat. Charli XCX)',
 564                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 565                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 566                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 567                          'iconic ep', 'iconic', 'love', 'it'],
 568                 'duration': 180,
 569                 'uploader': 'Icona Pop',
 570                 'uploader_id': 'IconaPop',
 571                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 572                 'creator': 'Icona Pop',
 573                 'track': 'I Love It (feat. Charli XCX)',
 574                 'artist': 'Icona Pop',
 575             }
 576         },
 577         {
 578             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 579             'note': 'Test VEVO video with age protection (#956)',
 580             'info_dict': {
 581                 'id': '07FYdnEawAQ',
 582                 'ext': 'mp4',
 583                 'upload_date': '20130703',
 584                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 585                 'alt_title': 'Tunnel Vision',
 586                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 587                 'duration': 419,
 588                 'uploader': 'justintimberlakeVEVO',
 589                 'uploader_id': 'justintimberlakeVEVO',
 590                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 591                 'creator': 'Justin Timberlake',
 592                 'track': 'Tunnel Vision',
 593                 'artist': 'Justin Timberlake',
 594                 'age_limit': 18,
 595             }
 596         },
 597         {
 598             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 599             'note': 'Embed-only video (#1746)',
 600             'info_dict': {
 601                 'id': 'yZIXLfi8CZQ',
 602                 'ext': 'mp4',
 603                 'upload_date': '20120608',
 604                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 605                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 606                 'uploader': 'SET India',
 607                 'uploader_id': 'setindia',
 608                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 609                 'age_limit': 18,
 610             }
 611         },
 612         {
 613             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 614             'note': 'Use the first video ID in the URL',
 615             'info_dict': {
 616                 'id': 'BaW_jenozKc',
 617                 'ext': 'mp4',
 618                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 619                 'uploader': 'Philipp Hagemeister',
 620                 'uploader_id': 'phihag',
 621                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 622                 'upload_date': '20121002',
 623                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 624                 'categories': ['Science & Technology'],
 625                 'tags': ['youtube-dl'],
 626                 'duration': 10,
 627                 'view_count': int,
 628                 'like_count': int,
 629                 'dislike_count': int,
 630             },
 631             'params': {
 632                 'skip_download': True,
 633             },
 634         },
 635         {
 636             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 637             'note': '256k DASH audio (format 141) via DASH manifest',
 638             'info_dict': {
 639                 'id': 'a9LDPn-MO4I',
 640                 'ext': 'm4a',
 641                 'upload_date': '20121002',
 642                 'uploader_id': '8KVIDEO',
 643                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 644                 'description': '',
 645                 'uploader': '8KVIDEO',
 646                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 647             },
 648             'params': {
 649                 'youtube_include_dash_manifest': True,
 650                 'format': '141',
 651             },
 652             'skip': 'format 141 not served anymore',
 653         },
 654         # DASH manifest with encrypted signature
 655         {
 656             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 657             'info_dict': {
 658                 'id': 'IB3lcPjvWLA',
 659                 'ext': 'm4a',
 660                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 661                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 662                 'duration': 244,
 663                 'uploader': 'AfrojackVEVO',
 664                 'uploader_id': 'AfrojackVEVO',
 665                 'upload_date': '20131011',
 666             },
 667             'params': {
 668                 'youtube_include_dash_manifest': True,
 669                 'format': '141/bestaudio[ext=m4a]',
 670             },
 671         },
 672         # JS player signature function name containing $
 673         {
 674             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 675             'info_dict': {
 676                 'id': 'nfWlot6h_JM',
 677                 'ext': 'm4a',
 678                 'title': 'Taylor Swift - Shake It Off',
 679                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
 680                 'duration': 242,
 681                 'uploader': 'TaylorSwiftVEVO',
 682                 'uploader_id': 'TaylorSwiftVEVO',
 683                 'upload_date': '20140818',
 684                 'creator': 'Taylor Swift',
 685             },
 686             'params': {
 687                 'youtube_include_dash_manifest': True,
 688                 'format': '141/bestaudio[ext=m4a]',
 689             },
 690         },
 691         # Controversy video
 692         {
 693             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 694             'info_dict': {
 695                 'id': 'T4XJQO3qol8',
 696                 'ext': 'mp4',
 697                 'duration': 219,
 698                 'upload_date': '20100909',
 699                 'uploader': 'Amazing Atheist',
 700                 'uploader_id': 'TheAmazingAtheist',
 701                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 702                 'title': 'Burning Everyone\'s Koran',
 703                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 704             }
 705         },
 706         # Normal age-gate video (No vevo, embed allowed)
 707         {
 708             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 709             'info_dict': {
 710                 'id': 'HtVdAasjOgU',
 711                 'ext': 'mp4',
 712                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 713                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 714                 'duration': 142,
 715                 'uploader': 'The Witcher',
 716                 'uploader_id': 'WitcherGame',
 717                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 718                 'upload_date': '20140605',
 719                 'age_limit': 18,
 720             },
 721         },
 722         # Age-gate video with encrypted signature
 723         {
 724             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 725             'info_dict': {
 726                 'id': '6kLq3WMV1nU',
 727                 'ext': 'mp4',
 728                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 729                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 730                 'duration': 246,
 731                 'uploader': 'LloydVEVO',
 732                 'uploader_id': 'LloydVEVO',
 733                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 734                 'upload_date': '20110629',
 735                 'age_limit': 18,
 736             },
 737         },
 738         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 739         # YouTube Red ad is not captured for creator
 740         {
 741             'url': '__2ABJjxzNo',
 742             'info_dict': {
 743                 'id': '__2ABJjxzNo',
 744                 'ext': 'mp4',
 745                 'duration': 266,
 746                 'upload_date': '20100430',
 747                 'uploader_id': 'deadmau5',
 748                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 749                 'creator': 'deadmau5',
 750                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 751                 'uploader': 'deadmau5',
 752                 'title': 'Deadmau5 - Some Chords (HD)',
 753                 'alt_title': 'Some Chords',
 754             },
 755             'expected_warnings': [
 756                 'DASH manifest missing',
 757             ]
 758         },
 759         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 760         {
 761             'url': 'lqQg6PlCWgI',
 762             'info_dict': {
 763                 'id': 'lqQg6PlCWgI',
 764                 'ext': 'mp4',
 765                 'duration': 6085,
 766                 'upload_date': '20150827',
 767                 'uploader_id': 'olympic',
 768                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 769                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 770                 'uploader': 'Olympic',
 771                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 772             },
 773             'params': {
 774                 'skip_download': 'requires avconv',
 775             }
 776         },
 777         # Non-square pixels
 778         {
 779             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 780             'info_dict': {
 781                 'id': '_b-2C3KPAM0',
 782                 'ext': 'mp4',
 783                 'stretched_ratio': 16 / 9.,
 784                 'duration': 85,
 785                 'upload_date': '20110310',
 786                 'uploader_id': 'AllenMeow',
 787                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 788                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 789                 'uploader': '孫ᄋᄅ',
 790                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 791             },
 792         },
 793         # url_encoded_fmt_stream_map is empty string
 794         {
 795             'url': 'qEJwOuvDf7I',
 796             'info_dict': {
 797                 'id': 'qEJwOuvDf7I',
 798                 'ext': 'webm',
 799                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 800                 'description': '',
 801                 'upload_date': '20150404',
 802                 'uploader_id': 'spbelect',
 803                 'uploader': 'Наблюдатели Петербурга',
 804             },
 805             'params': {
 806                 'skip_download': 'requires avconv',
 807             },
 808             'skip': 'This live event has ended.',
 809         },
 810         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 811         {
 812             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 813             'info_dict': {
 814                 'id': 'FIl7x6_3R5Y',
 815                 'ext': 'webm',
 816                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 817                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 818                 'duration': 220,
 819                 'upload_date': '20150625',
 820                 'uploader_id': 'dorappi2000',
 821                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 822                 'uploader': 'dorappi2000',
 823                 'formats': 'mincount:31',
 824             },
 825             'skip': 'not actual anymore',
 826         },
 827         # DASH manifest with segment_list
 828         {
 829             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 830             'md5': '8ce563a1d667b599d21064e982ab9e31',
 831             'info_dict': {
 832                 'id': 'CsmdDsKjzN8',
 833                 'ext': 'mp4',
 834                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 835                 'uploader': 'Airtek',
 836                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 837                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 838                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 839             },
 840             'params': {
 841                 'youtube_include_dash_manifest': True,
 842                 'format': '135',  # bestvideo
 843             },
 844             'skip': 'This live event has ended.',
 845         },
 846         {
 847             # Multifeed videos (multiple cameras), URL is for Main Camera
 848             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 849             'info_dict': {
 850                 'id': 'jqWvoWXjCVs',
 851                 'title': 'teamPGP: Rocket League Noob Stream',
 852                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 853             },
 854             'playlist': [{
 855                 'info_dict': {
 856                     'id': 'jqWvoWXjCVs',
 857                     'ext': 'mp4',
 858                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 859                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 860                     'duration': 7335,
 861                     'upload_date': '20150721',
 862                     'uploader': 'Beer Games Beer',
 863                     'uploader_id': 'beergamesbeer',
 864                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 865                     'license': 'Standard YouTube License',
 866                 },
 867             }, {
 868                 'info_dict': {
 869                     'id': '6h8e8xoXJzg',
 870                     'ext': 'mp4',
 871                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 872                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 873                     'duration': 7337,
 874                     'upload_date': '20150721',
 875                     'uploader': 'Beer Games Beer',
 876                     'uploader_id': 'beergamesbeer',
 877                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 878                     'license': 'Standard YouTube License',
 879                 },
 880             }, {
 881                 'info_dict': {
 882                     'id': 'PUOgX5z9xZw',
 883                     'ext': 'mp4',
 884                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 885                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 886                     'duration': 7337,
 887                     'upload_date': '20150721',
 888                     'uploader': 'Beer Games Beer',
 889                     'uploader_id': 'beergamesbeer',
 890                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 891                     'license': 'Standard YouTube License',
 892                 },
 893             }, {
 894                 'info_dict': {
 895                     'id': 'teuwxikvS5k',
 896                     'ext': 'mp4',
 897                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 898                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 899                     'duration': 7334,
 900                     'upload_date': '20150721',
 901                     'uploader': 'Beer Games Beer',
 902                     'uploader_id': 'beergamesbeer',
 903                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 904                     'license': 'Standard YouTube License',
 905                 },
 906             }],
 907             'params': {
 908                 'skip_download': True,
 909             },
 910             'skip': 'This video is not available.',
 911         },
 912         {
 913             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 914             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 915             'info_dict': {
 916                 'id': 'gVfLd0zydlo',
 917                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 918             },
 919             'playlist_count': 2,
 920             'skip': 'Not multifeed anymore',
 921         },
 922         {
 923             'url': 'https://vid.plus/FlRa-iH7PGw',
 924             'only_matching': True,
 925         },
 926         {
 927             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 928             'only_matching': True,
 929         },
 930         {
 931             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 932             # Also tests cut-off URL expansion in video description (see
 933             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 934             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 935             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 936             'info_dict': {
 937                 'id': 'lsguqyKfVQg',
 938                 'ext': 'mp4',
 939                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 940                 'alt_title': 'Dark Walk - Position Music',
 941                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 942                 'duration': 133,
 943                 'upload_date': '20151119',
 944                 'uploader_id': 'IronSoulElf',
 945                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 946                 'uploader': 'IronSoulElf',
 947                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 948                 'track': 'Dark Walk - Position Music',
 949                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 950                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 951             },
 952             'params': {
 953                 'skip_download': True,
 954             },
 955         },
 956         {
 957             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 958             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 959             'only_matching': True,
 960         },
 961         {
 962             # Video with yt:stretch=17:0
 963             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 964             'info_dict': {
 965                 'id': 'Q39EVAstoRM',
 966                 'ext': 'mp4',
 967                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 968                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 969                 'upload_date': '20151107',
 970                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 971                 'uploader': 'CH GAMER DROID',
 972             },
 973             'params': {
 974                 'skip_download': True,
 975             },
 976             'skip': 'This video does not exist.',
 977         },
 978         {
 979             # Video licensed under Creative Commons
 980             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 981             'info_dict': {
 982                 'id': 'M4gD1WSo5mA',
 983                 'ext': 'mp4',
 984                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 985                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 986                 'duration': 721,
 987                 'upload_date': '20150127',
 988                 'uploader_id': 'BerkmanCenter',
 989                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 990                 'uploader': 'The Berkman Klein Center for Internet & Society',
 991                 'license': 'Creative Commons Attribution license (reuse allowed)',
 992             },
 993             'params': {
 994                 'skip_download': True,
 995             },
 996         },
 997         {
 998             # Channel-like uploader_url
 999             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1000             'info_dict': {
1001                 'id': 'eQcmzGIKrzg',
1002                 'ext': 'mp4',
1003                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1004                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
1005                 'duration': 4060,
1006                 'upload_date': '20151119',
1007                 'uploader': 'Bernie Sanders',
1008                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1009                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1010                 'license': 'Creative Commons Attribution license (reuse allowed)',
1011             },
1012             'params': {
1013                 'skip_download': True,
1014             },
1015         },
1016         {
1017             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1018             'only_matching': True,
1019         },
1020         {
1021             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1022             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1023             'only_matching': True,
1024         },
1025         {
1026             # Rental video preview
1027             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1028             'info_dict': {
1029                 'id': 'uGpuVWrhIzE',
1030                 'ext': 'mp4',
1031                 'title': 'Piku - Trailer',
1032                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1033                 'upload_date': '20150811',
1034                 'uploader': 'FlixMatrix',
1035                 'uploader_id': 'FlixMatrixKaravan',
1036                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1037                 'license': 'Standard YouTube License',
1038             },
1039             'params': {
1040                 'skip_download': True,
1041             },
1042             'skip': 'This video is not available.',
1043         },
1044         {
1045             # YouTube Red video with episode data
1046             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1047             'info_dict': {
1048                 'id': 'iqKdEhx-dD4',
1049                 'ext': 'mp4',
1050                 'title': 'Isolation - Mind Field (Ep 1)',
1051                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1052                 'duration': 2085,
1053                 'upload_date': '20170118',
1054                 'uploader': 'Vsauce',
1055                 'uploader_id': 'Vsauce',
1056                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1057                 'series': 'Mind Field',
1058                 'season_number': 1,
1059                 'episode_number': 1,
1060             },
1061             'params': {
1062                 'skip_download': True,
1063             },
1064             'expected_warnings': [
1065                 'Skipping DASH manifest',
1066             ],
1067         },
1068         {
1069             # The following content has been identified by the YouTube community
1070             # as inappropriate or offensive to some audiences.
1071             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1072             'info_dict': {
1073                 'id': '6SJNVb0GnPI',
1074                 'ext': 'mp4',
1075                 'title': 'Race Differences in Intelligence',
1076                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1077                 'duration': 965,
1078                 'upload_date': '20140124',
1079                 'uploader': 'New Century Foundation',
1080                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1082             },
1083             'params': {
1084                 'skip_download': True,
1085             },
1086         },
1087         {
1088             # itag 212
1089             'url': '1t24XAntNCY',
1090             'only_matching': True,
1091         },
1092         {
1093             # geo restricted to JP
1094             'url': 'sJL6WA-aGkQ',
1095             'only_matching': True,
1096         },
1097         {
1098             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1099             'only_matching': True,
1100         },
1101         {
1102             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1103             'only_matching': True,
1104         },
1105         {
1106             # DRM protected
1107             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1108             'only_matching': True,
1109         },
1110         {
1111             # Video with unsupported adaptive stream type formats
1112             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1113             'info_dict': {
1114                 'id': 'Z4Vy8R84T1U',
1115                 'ext': 'mp4',
1116                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1117                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1118                 'duration': 433,
1119                 'upload_date': '20130923',
1120                 'uploader': 'Amelia Putri Harwita',
1121                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1122                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1123                 'formats': 'maxcount:10',
1124             },
1125             'params': {
1126                 'skip_download': True,
1127                 'youtube_include_dash_manifest': False,
1128             },
1129         },
1130         {
1131             # Youtube Music Auto-generated description
1132             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1133             'info_dict': {
1134                 'id': 'MgNrAu2pzNs',
1135                 'ext': 'mp4',
1136                 'title': 'Voyeur Girl',
1137                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1138                 'upload_date': '20190312',
1139                 'uploader': 'Various Artists - Topic',
1140                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1141                 'artist': 'Stephen',
1142                 'track': 'Voyeur Girl',
1143                 'album': 'it\'s too much love to know my dear',
1144                 'release_date': '20190313',
1145                 'release_year': 2019,
1146             },
1147             'params': {
1148                 'skip_download': True,
1149             },
1150         },
1151         {
1152             # Youtube Music Auto-generated description
1153             # Retrieve 'artist' field from 'Artist:' in video description
1154             # when it is present on youtube music video
1155             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1156             'info_dict': {
1157                 'id': 'k0jLE7tTwjY',
1158                 'ext': 'mp4',
1159                 'title': 'Latch Feat. Sam Smith',
1160                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1161                 'upload_date': '20150110',
1162                 'uploader': 'Various Artists - Topic',
1163                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1164                 'artist': 'Disclosure',
1165                 'track': 'Latch Feat. Sam Smith',
1166                 'album': 'Latch Featuring Sam Smith',
1167                 'release_date': '20121008',
1168                 'release_year': 2012,
1169             },
1170             'params': {
1171                 'skip_download': True,
1172             },
1173         },
1174         {
1175             # Youtube Music Auto-generated description
1176             # handle multiple artists on youtube music video
1177             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1178             'info_dict': {
1179                 'id': '74qn0eJSjpA',
1180                 'ext': 'mp4',
1181                 'title': 'Eastside',
1182                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1183                 'upload_date': '20180710',
1184                 'uploader': 'Benny Blanco - Topic',
1185                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1186                 'artist': 'benny blanco, Halsey, Khalid',
1187                 'track': 'Eastside',
1188                 'album': 'Eastside',
1189                 'release_date': '20180713',
1190                 'release_year': 2018,
1191             },
1192             'params': {
1193                 'skip_download': True,
1194             },
1195         },
1196         {
1197             # Youtube Music Auto-generated description
1198             # handle youtube music video with release_year and no release_date
1199             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1200             'info_dict': {
1201                 'id': '-hcAI0g-f5M',
1202                 'ext': 'mp4',
1203                 'title': 'Put It On Me',
1204                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1205                 'upload_date': '20180426',
1206                 'uploader': 'Matt Maeson - Topic',
1207                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1208                 'artist': 'Matt Maeson',
1209                 'track': 'Put It On Me',
1210                 'album': 'The Hearse',
1211                 'release_date': None,
1212                 'release_year': 2018,
1213             },
1214             'params': {
1215                 'skip_download': True,
1216             },
1217         },
1218     ]
1219
1220     def __init__(self, *args, **kwargs):
1221         super(YoutubeIE, self).__init__(*args, **kwargs)
1222         self._player_cache = {}
1223
1224     def report_video_info_webpage_download(self, video_id):
1225         """Report attempt to download video info webpage."""
1226         self.to_screen('%s: Downloading video info webpage' % video_id)
1227
1228     def report_information_extraction(self, video_id):
1229         """Report attempt to extract video information."""
1230         self.to_screen('%s: Extracting video information' % video_id)
1231
1232     def report_unavailable_format(self, video_id, format):
1233         """Report extracted video URL."""
1234         self.to_screen('%s: Format %s not available' % (video_id, format))
1235
1236     def report_rtmp_download(self):
1237         """Indicate the download will use the RTMP protocol."""
1238         self.to_screen('RTMP download detected')
1239
1240     def _signature_cache_id(self, example_sig):
1241         """ Return a string representation of a signature """
1242         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1243
1244     def _extract_signature_function(self, video_id, player_url, example_sig):
1245         id_m = re.match(
1246             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1247             player_url)
1248         if not id_m:
1249             raise ExtractorError('Cannot identify player %r' % player_url)
1250         player_type = id_m.group('ext')
1251         player_id = id_m.group('id')
1252
1253         # Read from filesystem cache
1254         func_id = '%s_%s_%s' % (
1255             player_type, player_id, self._signature_cache_id(example_sig))
1256         assert os.path.basename(func_id) == func_id
1257
1258         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1259         if cache_spec is not None:
1260             return lambda s: ''.join(s[i] for i in cache_spec)
1261
1262         download_note = (
1263             'Downloading player %s' % player_url
1264             if self._downloader.params.get('verbose') else
1265             'Downloading %s player %s' % (player_type, player_id)
1266         )
1267         if player_type == 'js':
1268             code = self._download_webpage(
1269                 player_url, video_id,
1270                 note=download_note,
1271                 errnote='Download of %s failed' % player_url)
1272             res = self._parse_sig_js(code)
1273         elif player_type == 'swf':
1274             urlh = self._request_webpage(
1275                 player_url, video_id,
1276                 note=download_note,
1277                 errnote='Download of %s failed' % player_url)
1278             code = urlh.read()
1279             res = self._parse_sig_swf(code)
1280         else:
1281             assert False, 'Invalid player type %r' % player_type
1282
1283         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1284         cache_res = res(test_string)
1285         cache_spec = [ord(c) for c in cache_res]
1286
1287         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1288         return res
1289
1290     def _print_sig_code(self, func, example_sig):
1291         def gen_sig_code(idxs):
1292             def _genslice(start, end, step):
1293                 starts = '' if start == 0 else str(start)
1294                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1295                 steps = '' if step == 1 else (':%d' % step)
1296                 return 's[%s%s%s]' % (starts, ends, steps)
1297
1298             step = None
1299             # Quelch pyflakes warnings - start will be set when step is set
1300             start = '(Never used)'
1301             for i, prev in zip(idxs[1:], idxs[:-1]):
1302                 if step is not None:
1303                     if i - prev == step:
1304                         continue
1305                     yield _genslice(start, prev, step)
1306                     step = None
1307                     continue
1308                 if i - prev in [-1, 1]:
1309                     step = i - prev
1310                     start = prev
1311                     continue
1312                 else:
1313                     yield 's[%d]' % prev
1314             if step is None:
1315                 yield 's[%d]' % i
1316             else:
1317                 yield _genslice(start, i, step)
1318
1319         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1320         cache_res = func(test_string)
1321         cache_spec = [ord(c) for c in cache_res]
1322         expr_code = ' + '.join(gen_sig_code(cache_spec))
1323         signature_id_tuple = '(%s)' % (
1324             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1325         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1326                 '    return %s\n') % (signature_id_tuple, expr_code)
1327         self.to_screen('Extracted signature function:\n' + code)
1328
1329     def _parse_sig_js(self, jscode):
1330         funcname = self._search_regex(
1331             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1332              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1333              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1334              # Obsolete patterns
1335              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1336              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1337              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1338              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1343             jscode, 'Initial JS player signature function name', group='sig')
1344
1345         jsi = JSInterpreter(jscode)
1346         initial_function = jsi.extract_function(funcname)
1347         return lambda s: initial_function([s])
1348
1349     def _parse_sig_swf(self, file_contents):
1350         swfi = SWFInterpreter(file_contents)
1351         TARGET_CLASSNAME = 'SignatureDecipher'
1352         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1353         initial_function = swfi.extract_function(searched_class, 'decipher')
1354         return lambda s: initial_function([s])
1355
1356     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1357         """Turn the encrypted s field into a working signature"""
1358
1359         if player_url is None:
1360             raise ExtractorError('Cannot decrypt signature without player_url')
1361
1362         if player_url.startswith('//'):
1363             player_url = 'https:' + player_url
1364         elif not re.match(r'https?://', player_url):
1365             player_url = compat_urlparse.urljoin(
1366                 'https://www.youtube.com', player_url)
1367         try:
1368             player_id = (player_url, self._signature_cache_id(s))
1369             if player_id not in self._player_cache:
1370                 func = self._extract_signature_function(
1371                     video_id, player_url, s
1372                 )
1373                 self._player_cache[player_id] = func
1374             func = self._player_cache[player_id]
1375             if self._downloader.params.get('youtube_print_sig_code'):
1376                 self._print_sig_code(func, s)
1377             return func(s)
1378         except Exception as e:
1379             tb = traceback.format_exc()
1380             raise ExtractorError(
1381                 'Signature extraction failed: ' + tb, cause=e)
1382
1383     def _get_subtitles(self, video_id, webpage):
1384         try:
1385             subs_doc = self._download_xml(
1386                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1387                 video_id, note=False)
1388         except ExtractorError as err:
1389             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1390             return {}
1391
1392         sub_lang_list = {}
1393         for track in subs_doc.findall('track'):
1394             lang = track.attrib['lang_code']
1395             if lang in sub_lang_list:
1396                 continue
1397             sub_formats = []
1398             for ext in self._SUBTITLE_FORMATS:
1399                 params = compat_urllib_parse_urlencode({
1400                     'lang': lang,
1401                     'v': video_id,
1402                     'fmt': ext,
1403                     'name': track.attrib['name'].encode('utf-8'),
1404                 })
1405                 sub_formats.append({
1406                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1407                     'ext': ext,
1408                 })
1409             sub_lang_list[lang] = sub_formats
1410         if not sub_lang_list:
1411             self._downloader.report_warning('video doesn\'t have subtitles')
1412             return {}
1413         return sub_lang_list
1414
1415     def _get_ytplayer_config(self, video_id, webpage):
1416         patterns = (
1417             # User data may contain arbitrary character sequences that may affect
1418             # JSON extraction with regex, e.g. when '};' is contained the second
1419             # regex won't capture the whole JSON. Yet working around by trying more
1420             # concrete regex first keeping in mind proper quoted string handling
1421             # to be implemented in future that will replace this workaround (see
1422             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1423             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1424             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1425             r';ytplayer\.config\s*=\s*({.+?});',
1426         )
1427         config = self._search_regex(
1428             patterns, webpage, 'ytplayer.config', default=None)
1429         if config:
1430             return self._parse_json(
1431                 uppercase_escape(config), video_id, fatal=False)
1432
1433     def _get_automatic_captions(self, video_id, webpage):
1434         """We need the webpage for getting the captions url, pass it as an
1435            argument to speed up the process."""
1436         self.to_screen('%s: Looking for automatic captions' % video_id)
1437         player_config = self._get_ytplayer_config(video_id, webpage)
1438         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1439         if not player_config:
1440             self._downloader.report_warning(err_msg)
1441             return {}
1442         try:
1443             args = player_config['args']
1444             caption_url = args.get('ttsurl')
1445             if caption_url:
1446                 timestamp = args['timestamp']
1447                 # We get the available subtitles
1448                 list_params = compat_urllib_parse_urlencode({
1449                     'type': 'list',
1450                     'tlangs': 1,
1451                     'asrs': 1,
1452                 })
1453                 list_url = caption_url + '&' + list_params
1454                 caption_list = self._download_xml(list_url, video_id)
1455                 original_lang_node = caption_list.find('track')
1456                 if original_lang_node is None:
1457                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1458                     return {}
1459                 original_lang = original_lang_node.attrib['lang_code']
1460                 caption_kind = original_lang_node.attrib.get('kind', '')
1461
1462                 sub_lang_list = {}
1463                 for lang_node in caption_list.findall('target'):
1464                     sub_lang = lang_node.attrib['lang_code']
1465                     sub_formats = []
1466                     for ext in self._SUBTITLE_FORMATS:
1467                         params = compat_urllib_parse_urlencode({
1468                             'lang': original_lang,
1469                             'tlang': sub_lang,
1470                             'fmt': ext,
1471                             'ts': timestamp,
1472                             'kind': caption_kind,
1473                         })
1474                         sub_formats.append({
1475                             'url': caption_url + '&' + params,
1476                             'ext': ext,
1477                         })
1478                     sub_lang_list[sub_lang] = sub_formats
1479                 return sub_lang_list
1480
1481             def make_captions(sub_url, sub_langs):
1482                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1483                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1484                 captions = {}
1485                 for sub_lang in sub_langs:
1486                     sub_formats = []
1487                     for ext in self._SUBTITLE_FORMATS:
1488                         caption_qs.update({
1489                             'tlang': [sub_lang],
1490                             'fmt': [ext],
1491                         })
1492                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1493                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1494                         sub_formats.append({
1495                             'url': sub_url,
1496                             'ext': ext,
1497                         })
1498                     captions[sub_lang] = sub_formats
1499                 return captions
1500
1501             # New captions format as of 22.06.2017
1502             player_response = args.get('player_response')
1503             if player_response and isinstance(player_response, compat_str):
1504                 player_response = self._parse_json(
1505                     player_response, video_id, fatal=False)
1506                 if player_response:
1507                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1508                     base_url = renderer['captionTracks'][0]['baseUrl']
1509                     sub_lang_list = []
1510                     for lang in renderer['translationLanguages']:
1511                         lang_code = lang.get('languageCode')
1512                         if lang_code:
1513                             sub_lang_list.append(lang_code)
1514                     return make_captions(base_url, sub_lang_list)
1515
1516             # Some videos don't provide ttsurl but rather caption_tracks and
1517             # caption_translation_languages (e.g. 20LmZk1hakA)
1518             # Does not used anymore as of 22.06.2017
1519             caption_tracks = args['caption_tracks']
1520             caption_translation_languages = args['caption_translation_languages']
1521             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1522             sub_lang_list = []
1523             for lang in caption_translation_languages.split(','):
1524                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1525                 sub_lang = lang_qs.get('lc', [None])[0]
1526                 if sub_lang:
1527                     sub_lang_list.append(sub_lang)
1528             return make_captions(caption_url, sub_lang_list)
1529         # An extractor error can be raise by the download process if there are
1530         # no automatic captions but there are subtitles
1531         except (KeyError, IndexError, ExtractorError):
1532             self._downloader.report_warning(err_msg)
1533             return {}
1534
1535     def _mark_watched(self, video_id, video_info, player_response):
1536         playback_url = url_or_none(try_get(
1537             player_response,
1538             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1539             video_info, lambda x: x['videostats_playback_base_url'][0]))
1540         if not playback_url:
1541             return
1542         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1543         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1544
1545         # cpn generation algorithm is reverse engineered from base.js.
1546         # In fact it works even with dummy cpn.
1547         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1548         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1549
1550         qs.update({
1551             'ver': ['2'],
1552             'cpn': [cpn],
1553         })
1554         playback_url = compat_urlparse.urlunparse(
1555             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1556
1557         self._download_webpage(
1558             playback_url, video_id, 'Marking watched',
1559             'Unable to mark watched', fatal=False)
1560
1561     @staticmethod
1562     def _extract_urls(webpage):
1563         # Embedded YouTube player
1564         entries = [
1565             unescapeHTML(mobj.group('url'))
1566             for mobj in re.finditer(r'''(?x)
1567             (?:
1568                 <iframe[^>]+?src=|
1569                 data-video-url=|
1570                 <embed[^>]+?src=|
1571                 embedSWF\(?:\s*|
1572                 <object[^>]+data=|
1573                 new\s+SWFObject\(
1574             )
1575             (["\'])
1576                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1577                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1578             \1''', webpage)]
1579
1580         # lazyYT YouTube embed
1581         entries.extend(list(map(
1582             unescapeHTML,
1583             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1584
1585         # Wordpress "YouTube Video Importer" plugin
1586         matches = re.findall(r'''(?x)<div[^>]+
1587             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1588             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1589         entries.extend(m[-1] for m in matches)
1590
1591         return entries
1592
1593     @staticmethod
1594     def _extract_url(webpage):
1595         urls = YoutubeIE._extract_urls(webpage)
1596         return urls[0] if urls else None
1597
1598     @classmethod
1599     def extract_id(cls, url):
1600         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1601         if mobj is None:
1602             raise ExtractorError('Invalid URL: %s' % url)
1603         video_id = mobj.group(2)
1604         return video_id
1605
1606     @staticmethod
1607     def _extract_chapters(description, duration):
1608         if not description:
1609             return None
1610         chapter_lines = re.findall(
1611             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1612             description)
1613         if not chapter_lines:
1614             return None
1615         chapters = []
1616         for next_num, (chapter_line, time_point) in enumerate(
1617                 chapter_lines, start=1):
1618             start_time = parse_duration(time_point)
1619             if start_time is None:
1620                 continue
1621             if start_time > duration:
1622                 break
1623             end_time = (duration if next_num == len(chapter_lines)
1624                         else parse_duration(chapter_lines[next_num][1]))
1625             if end_time is None:
1626                 continue
1627             if end_time > duration:
1628                 end_time = duration
1629             if start_time > end_time:
1630                 break
1631             chapter_title = re.sub(
1632                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1633             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1634             chapters.append({
1635                 'start_time': start_time,
1636                 'end_time': end_time,
1637                 'title': chapter_title,
1638             })
1639         return chapters
1640
1641     def _real_extract(self, url):
1642         url, smuggled_data = unsmuggle_url(url, {})
1643
1644         proto = (
1645             'http' if self._downloader.params.get('prefer_insecure', False)
1646             else 'https')
1647
1648         start_time = None
1649         end_time = None
1650         parsed_url = compat_urllib_parse_urlparse(url)
1651         for component in [parsed_url.fragment, parsed_url.query]:
1652             query = compat_parse_qs(component)
1653             if start_time is None and 't' in query:
1654                 start_time = parse_duration(query['t'][0])
1655             if start_time is None and 'start' in query:
1656                 start_time = parse_duration(query['start'][0])
1657             if end_time is None and 'end' in query:
1658                 end_time = parse_duration(query['end'][0])
1659
1660         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1661         mobj = re.search(self._NEXT_URL_RE, url)
1662         if mobj:
1663             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1664         video_id = self.extract_id(url)
1665
1666         # Get video webpage
1667         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1668         video_webpage = self._download_webpage(url, video_id)
1669
1670         # Attempt to extract SWF player URL
1671         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1672         if mobj is not None:
1673             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1674         else:
1675             player_url = None
1676
1677         dash_mpds = []
1678
1679         def add_dash_mpd(video_info):
1680             dash_mpd = video_info.get('dashmpd')
1681             if dash_mpd and dash_mpd[0] not in dash_mpds:
1682                 dash_mpds.append(dash_mpd[0])
1683
1684         def add_dash_mpd_pr(pl_response):
1685             dash_mpd = url_or_none(try_get(
1686                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1687                 compat_str))
1688             if dash_mpd and dash_mpd not in dash_mpds:
1689                 dash_mpds.append(dash_mpd)
1690
1691         is_live = None
1692         view_count = None
1693
1694         def extract_view_count(v_info):
1695             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1696
1697         def extract_token(v_info):
1698             return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1699
1700         def extract_player_response(player_response, video_id):
1701             pl_response = str_or_none(player_response)
1702             if not pl_response:
1703                 return
1704             pl_response = self._parse_json(pl_response, video_id, fatal=False)
1705             if isinstance(pl_response, dict):
1706                 add_dash_mpd_pr(pl_response)
1707                 return pl_response
1708
1709         player_response = {}
1710
1711         # Get video info
1712         embed_webpage = None
1713         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1714             age_gate = True
1715             # We simulate the access to the video from www.youtube.com/v/{video_id}
1716             # this can be viewed without login into Youtube
1717             url = proto + '://www.youtube.com/embed/%s' % video_id
1718             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1719             data = compat_urllib_parse_urlencode({
1720                 'video_id': video_id,
1721                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1722                 'sts': self._search_regex(
1723                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1724             })
1725             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1726             video_info_webpage = self._download_webpage(
1727                 video_info_url, video_id,
1728                 note='Refetching age-gated info webpage',
1729                 errnote='unable to download video info webpage')
1730             video_info = compat_parse_qs(video_info_webpage)
1731             pl_response = video_info.get('player_response', [None])[0]
1732             player_response = extract_player_response(pl_response, video_id)
1733             add_dash_mpd(video_info)
1734             view_count = extract_view_count(video_info)
1735         else:
1736             age_gate = False
1737             video_info = None
1738             sts = None
1739             # Try looking directly into the video webpage
1740             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1741             if ytplayer_config:
1742                 args = ytplayer_config['args']
1743                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1744                     # Convert to the same format returned by compat_parse_qs
1745                     video_info = dict((k, [v]) for k, v in args.items())
1746                     add_dash_mpd(video_info)
1747                 # Rental video is not rented but preview is available (e.g.
1748                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1749                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1750                 if not video_info and args.get('ypc_vid'):
1751                     return self.url_result(
1752                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1753                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1754                     is_live = True
1755                 sts = ytplayer_config.get('sts')
1756                 if not player_response:
1757                     player_response = extract_player_response(args.get('player_response'), video_id)
1758             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1759                 add_dash_mpd_pr(player_response)
1760                 # We also try looking in get_video_info since it may contain different dashmpd
1761                 # URL that points to a DASH manifest with possibly different itag set (some itags
1762                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1763                 # manifest pointed by get_video_info's dashmpd).
1764                 # The general idea is to take a union of itags of both DASH manifests (for example
1765                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1766                 self.report_video_info_webpage_download(video_id)
1767                 for el in ('embedded', 'detailpage', 'vevo', ''):
1768                     query = {
1769                         'video_id': video_id,
1770                         'ps': 'default',
1771                         'eurl': '',
1772                         'gl': 'US',
1773                         'hl': 'en',
1774                     }
1775                     if el:
1776                         query['el'] = el
1777                     if sts:
1778                         query['sts'] = sts
1779                     video_info_webpage = self._download_webpage(
1780                         '%s://www.youtube.com/get_video_info' % proto,
1781                         video_id, note=False,
1782                         errnote='unable to download video info webpage',
1783                         fatal=False, query=query)
1784                     if not video_info_webpage:
1785                         continue
1786                     get_video_info = compat_parse_qs(video_info_webpage)
1787                     if not player_response:
1788                         pl_response = get_video_info.get('player_response', [None])[0]
1789                         player_response = extract_player_response(pl_response, video_id)
1790                     add_dash_mpd(get_video_info)
1791                     if view_count is None:
1792                         view_count = extract_view_count(get_video_info)
1793                     if not video_info:
1794                         video_info = get_video_info
1795                     get_token = extract_token(get_video_info)
1796                     if get_token:
1797                         # Different get_video_info requests may report different results, e.g.
1798                         # some may report video unavailability, but some may serve it without
1799                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1800                         # the original webpage as well as el=info and el=embedded get_video_info
1801                         # requests report video unavailability due to geo restriction while
1802                         # el=detailpage succeeds and returns valid data). This is probably
1803                         # due to YouTube measures against IP ranges of hosting providers.
1804                         # Working around by preferring the first succeeded video_info containing
1805                         # the token if no such video_info yet was found.
1806                         token = extract_token(video_info)
1807                         if not token:
1808                             video_info = get_video_info
1809                         break
1810
1811         def extract_unavailable_message():
1812             return self._html_search_regex(
1813                 (r'(?s)<div[^>]+id=["\']unavailable-submessage["\'][^>]+>(.+?)</div',
1814                  r'(?s)<h1[^>]+id=["\']unavailable-message["\'][^>]*>(.+?)</h1>'),
1815                 video_webpage, 'unavailable message', default=None)
1816
1817         if not video_info:
1818             unavailable_message = extract_unavailable_message()
1819             if not unavailable_message:
1820                 unavailable_message = 'Unable to extract video data'
1821             raise ExtractorError(
1822                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1823
1824         video_details = try_get(
1825             player_response, lambda x: x['videoDetails'], dict) or {}
1826
1827         video_title = video_info.get('title', [None])[0] or video_details.get('title')
1828         if not video_title:
1829             self._downloader.report_warning('Unable to extract video title')
1830             video_title = '_'
1831
1832         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1833         if video_description:
1834
1835             def replace_url(m):
1836                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1837                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1838                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1839                     qs = compat_parse_qs(parsed_redir_url.query)
1840                     q = qs.get('q')
1841                     if q and q[0]:
1842                         return q[0]
1843                 return redir_url
1844
1845             description_original = video_description = re.sub(r'''(?x)
1846                 <a\s+
1847                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1848                     (?:title|href)="([^"]+)"\s+
1849                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1850                     class="[^"]*"[^>]*>
1851                 [^<]+\.{3}\s*
1852                 </a>
1853             ''', replace_url, video_description)
1854             video_description = clean_html(video_description)
1855         else:
1856             video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
1857
1858         if not smuggled_data.get('force_singlefeed', False):
1859             if not self._downloader.params.get('noplaylist'):
1860                 multifeed_metadata_list = try_get(
1861                     player_response,
1862                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1863                     compat_str) or try_get(
1864                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1865                 if multifeed_metadata_list:
1866                     entries = []
1867                     feed_ids = []
1868                     for feed in multifeed_metadata_list.split(','):
1869                         # Unquote should take place before split on comma (,) since textual
1870                         # fields may contain comma as well (see
1871                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1872                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1873                         entries.append({
1874                             '_type': 'url_transparent',
1875                             'ie_key': 'Youtube',
1876                             'url': smuggle_url(
1877                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1878                                 {'force_singlefeed': True}),
1879                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1880                         })
1881                         feed_ids.append(feed_data['id'][0])
1882                     self.to_screen(
1883                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1884                         % (', '.join(feed_ids), video_id))
1885                     return self.playlist_result(entries, video_id, video_title, video_description)
1886             else:
1887                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1888
1889         if view_count is None:
1890             view_count = extract_view_count(video_info)
1891         if view_count is None and video_details:
1892             view_count = int_or_none(video_details.get('viewCount'))
1893
1894         if is_live is None:
1895             is_live = bool_or_none(video_details.get('isLive'))
1896
1897         # Check for "rental" videos
1898         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1899             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1900
1901         def _extract_filesize(media_url):
1902             return int_or_none(self._search_regex(
1903                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1904
1905         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1906             self.report_rtmp_download()
1907             formats = [{
1908                 'format_id': '_rtmp',
1909                 'protocol': 'rtmp',
1910                 'url': video_info['conn'][0],
1911                 'player_url': player_url,
1912             }]
1913         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1914             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1915             if 'rtmpe%3Dyes' in encoded_url_map:
1916                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1917             formats_spec = {}
1918             fmt_list = video_info.get('fmt_list', [''])[0]
1919             if fmt_list:
1920                 for fmt in fmt_list.split(','):
1921                     spec = fmt.split('/')
1922                     if len(spec) > 1:
1923                         width_height = spec[1].split('x')
1924                         if len(width_height) == 2:
1925                             formats_spec[spec[0]] = {
1926                                 'resolution': spec[1],
1927                                 'width': int_or_none(width_height[0]),
1928                                 'height': int_or_none(width_height[1]),
1929                             }
1930             q = qualities(['small', 'medium', 'hd720'])
1931             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1932             if streaming_formats:
1933                 for fmt in streaming_formats:
1934                     itag = str_or_none(fmt.get('itag'))
1935                     if not itag:
1936                         continue
1937                     quality = fmt.get('quality')
1938                     quality_label = fmt.get('qualityLabel') or quality
1939                     formats_spec[itag] = {
1940                         'asr': int_or_none(fmt.get('audioSampleRate')),
1941                         'filesize': int_or_none(fmt.get('contentLength')),
1942                         'format_note': quality_label,
1943                         'fps': int_or_none(fmt.get('fps')),
1944                         'height': int_or_none(fmt.get('height')),
1945                         'quality': q(quality),
1946                         # bitrate for itag 43 is always 2147483647
1947                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1948                         'width': int_or_none(fmt.get('width')),
1949                     }
1950             formats = []
1951             for url_data_str in encoded_url_map.split(','):
1952                 url_data = compat_parse_qs(url_data_str)
1953                 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1954                     continue
1955                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1956                 # Unsupported FORMAT_STREAM_TYPE_OTF
1957                 if stream_type == 3:
1958                     continue
1959                 format_id = url_data['itag'][0]
1960                 url = url_data['url'][0]
1961
1962                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1963                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1964                     jsplayer_url_json = self._search_regex(
1965                         ASSETS_RE,
1966                         embed_webpage if age_gate else video_webpage,
1967                         'JS player URL (1)', default=None)
1968                     if not jsplayer_url_json and not age_gate:
1969                         # We need the embed website after all
1970                         if embed_webpage is None:
1971                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1972                             embed_webpage = self._download_webpage(
1973                                 embed_url, video_id, 'Downloading embed webpage')
1974                         jsplayer_url_json = self._search_regex(
1975                             ASSETS_RE, embed_webpage, 'JS player URL')
1976
1977                     player_url = json.loads(jsplayer_url_json)
1978                     if player_url is None:
1979                         player_url_json = self._search_regex(
1980                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1981                             video_webpage, 'age gate player URL')
1982                         player_url = json.loads(player_url_json)
1983
1984                 if 'sig' in url_data:
1985                     url += '&signature=' + url_data['sig'][0]
1986                 elif 's' in url_data:
1987                     encrypted_sig = url_data['s'][0]
1988
1989                     if self._downloader.params.get('verbose'):
1990                         if player_url is None:
1991                             player_version = 'unknown'
1992                             player_desc = 'unknown'
1993                         else:
1994                             if player_url.endswith('swf'):
1995                                 player_version = self._search_regex(
1996                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1997                                     'flash player', fatal=False)
1998                                 player_desc = 'flash player %s' % player_version
1999                             else:
2000                                 player_version = self._search_regex(
2001                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2002                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2003                                     player_url,
2004                                     'html5 player', fatal=False)
2005                                 player_desc = 'html5 player %s' % player_version
2006
2007                         parts_sizes = self._signature_cache_id(encrypted_sig)
2008                         self.to_screen('{%s} signature length %s, %s' %
2009                                        (format_id, parts_sizes, player_desc))
2010
2011                     signature = self._decrypt_signature(
2012                         encrypted_sig, video_id, player_url, age_gate)
2013                     sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2014                     url += '&%s=%s' % (sp, signature)
2015                 if 'ratebypass' not in url:
2016                     url += '&ratebypass=yes'
2017
2018                 dct = {
2019                     'format_id': format_id,
2020                     'url': url,
2021                     'player_url': player_url,
2022                 }
2023                 if format_id in self._formats:
2024                     dct.update(self._formats[format_id])
2025                 if format_id in formats_spec:
2026                     dct.update(formats_spec[format_id])
2027
2028                 # Some itags are not included in DASH manifest thus corresponding formats will
2029                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2030                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2031                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2032                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2033
2034                 filesize = int_or_none(url_data.get(
2035                     'clen', [None])[0]) or _extract_filesize(url)
2036
2037                 quality = url_data.get('quality', [None])[0]
2038
2039                 more_fields = {
2040                     'filesize': filesize,
2041                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2042                     'width': width,
2043                     'height': height,
2044                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2045                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2046                     'quality': q(quality),
2047                 }
2048                 for key, value in more_fields.items():
2049                     if value:
2050                         dct[key] = value
2051                 type_ = url_data.get('type', [None])[0]
2052                 if type_:
2053                     type_split = type_.split(';')
2054                     kind_ext = type_split[0].split('/')
2055                     if len(kind_ext) == 2:
2056                         kind, _ = kind_ext
2057                         dct['ext'] = mimetype2ext(type_split[0])
2058                         if kind in ('audio', 'video'):
2059                             codecs = None
2060                             for mobj in re.finditer(
2061                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2062                                 if mobj.group('key') == 'codecs':
2063                                     codecs = mobj.group('val')
2064                                     break
2065                             if codecs:
2066                                 dct.update(parse_codecs(codecs))
2067                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2068                     dct['downloader_options'] = {
2069                         # Youtube throttles chunks >~10M
2070                         'http_chunk_size': 10485760,
2071                     }
2072                 formats.append(dct)
2073         else:
2074             manifest_url = (
2075                 url_or_none(try_get(
2076                     player_response,
2077                     lambda x: x['streamingData']['hlsManifestUrl'],
2078                     compat_str))
2079                 or url_or_none(try_get(
2080                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2081             if manifest_url:
2082                 formats = []
2083                 m3u8_formats = self._extract_m3u8_formats(
2084                     manifest_url, video_id, 'mp4', fatal=False)
2085                 for a_format in m3u8_formats:
2086                     itag = self._search_regex(
2087                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2088                     if itag:
2089                         a_format['format_id'] = itag
2090                         if itag in self._formats:
2091                             dct = self._formats[itag].copy()
2092                             dct.update(a_format)
2093                             a_format = dct
2094                     a_format['player_url'] = player_url
2095                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2096                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2097                     formats.append(a_format)
2098             else:
2099                 error_message = extract_unavailable_message()
2100                 if not error_message:
2101                     error_message = clean_html(try_get(
2102                         player_response, lambda x: x['playabilityStatus']['reason'],
2103                         compat_str))
2104                 if not error_message:
2105                     error_message = clean_html(
2106                         try_get(video_info, lambda x: x['reason'][0], compat_str))
2107                 if error_message:
2108                     raise ExtractorError(error_message, expected=True)
2109                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2110
2111         # uploader
2112         video_uploader = try_get(
2113             video_info, lambda x: x['author'][0],
2114             compat_str) or str_or_none(video_details.get('author'))
2115         if video_uploader:
2116             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2117         else:
2118             self._downloader.report_warning('unable to extract uploader name')
2119
2120         # uploader_id
2121         video_uploader_id = None
2122         video_uploader_url = None
2123         mobj = re.search(
2124             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2125             video_webpage)
2126         if mobj is not None:
2127             video_uploader_id = mobj.group('uploader_id')
2128             video_uploader_url = mobj.group('uploader_url')
2129         else:
2130             self._downloader.report_warning('unable to extract uploader nickname')
2131
2132         channel_id = (
2133             str_or_none(video_details.get('channelId'))
2134             or self._html_search_meta(
2135                 'channelId', video_webpage, 'channel id', default=None)
2136             or self._search_regex(
2137                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2138                 video_webpage, 'channel id', default=None, group='id'))
2139         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2140
2141         # thumbnail image
2142         # We try first to get a high quality image:
2143         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2144                             video_webpage, re.DOTALL)
2145         if m_thumb is not None:
2146             video_thumbnail = m_thumb.group(1)
2147         elif 'thumbnail_url' not in video_info:
2148             self._downloader.report_warning('unable to extract video thumbnail')
2149             video_thumbnail = None
2150         else:   # don't panic if we can't find it
2151             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2152
2153         # upload date
2154         upload_date = self._html_search_meta(
2155             'datePublished', video_webpage, 'upload date', default=None)
2156         if not upload_date:
2157             upload_date = self._search_regex(
2158                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2159                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2160                 video_webpage, 'upload date', default=None)
2161         upload_date = unified_strdate(upload_date)
2162
2163         video_license = self._html_search_regex(
2164             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2165             video_webpage, 'license', default=None)
2166
2167         m_music = re.search(
2168             r'''(?x)
2169                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2170                 <ul[^>]*>\s*
2171                 <li>(?P<title>.+?)
2172                 by (?P<creator>.+?)
2173                 (?:
2174                     \(.+?\)|
2175                     <a[^>]*
2176                         (?:
2177                             \bhref=["\']/red[^>]*>|             # drop possible
2178                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2179                         )
2180                     .*?
2181                 )?</li
2182             ''',
2183             video_webpage)
2184         if m_music:
2185             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2186             video_creator = clean_html(m_music.group('creator'))
2187         else:
2188             video_alt_title = video_creator = None
2189
2190         def extract_meta(field):
2191             return self._html_search_regex(
2192                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2193                 video_webpage, field, default=None)
2194
2195         track = extract_meta('Song')
2196         artist = extract_meta('Artist')
2197         album = extract_meta('Album')
2198
2199         # Youtube Music Auto-generated description
2200         release_date = release_year = None
2201         if video_description:
2202             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2203             if mobj:
2204                 if not track:
2205                     track = mobj.group('track').strip()
2206                 if not artist:
2207                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2208                 if not album:
2209                     album = mobj.group('album'.strip())
2210                 release_year = mobj.group('release_year')
2211                 release_date = mobj.group('release_date')
2212                 if release_date:
2213                     release_date = release_date.replace('-', '')
2214                     if not release_year:
2215                         release_year = int(release_date[:4])
2216                 if release_year:
2217                     release_year = int(release_year)
2218
2219         m_episode = re.search(
2220             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2221             video_webpage)
2222         if m_episode:
2223             series = unescapeHTML(m_episode.group('series'))
2224             season_number = int(m_episode.group('season'))
2225             episode_number = int(m_episode.group('episode'))
2226         else:
2227             series = season_number = episode_number = None
2228
2229         m_cat_container = self._search_regex(
2230             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2231             video_webpage, 'categories', default=None)
2232         if m_cat_container:
2233             category = self._html_search_regex(
2234                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2235                 default=None)
2236             video_categories = None if category is None else [category]
2237         else:
2238             video_categories = None
2239
2240         video_tags = [
2241             unescapeHTML(m.group('content'))
2242             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2243
2244         def _extract_count(count_name):
2245             return str_to_int(self._search_regex(
2246                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2247                 % re.escape(count_name),
2248                 video_webpage, count_name, default=None))
2249
2250         like_count = _extract_count('like')
2251         dislike_count = _extract_count('dislike')
2252
2253         if view_count is None:
2254             view_count = str_to_int(self._search_regex(
2255                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2256                 'view count', default=None))
2257
2258         average_rating = (
2259             float_or_none(video_details.get('averageRating'))
2260             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2261
2262         # subtitles
2263         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2264         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2265
2266         video_duration = try_get(
2267             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2268         if not video_duration:
2269             video_duration = int_or_none(video_details.get('lengthSeconds'))
2270         if not video_duration:
2271             video_duration = parse_duration(self._html_search_meta(
2272                 'duration', video_webpage, 'video duration'))
2273
2274         # annotations
2275         video_annotations = None
2276         if self._downloader.params.get('writeannotations', False):
2277             xsrf_token = self._search_regex(
2278                 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2279                 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2280             invideo_url = try_get(
2281                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2282             if xsrf_token and invideo_url:
2283                 xsrf_field_name = self._search_regex(
2284                     r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2285                     video_webpage, 'xsrf field name',
2286                     group='xsrf_field_name', default='session_token')
2287                 video_annotations = self._download_webpage(
2288                     self._proto_relative_url(invideo_url),
2289                     video_id, note='Downloading annotations',
2290                     errnote='Unable to download video annotations', fatal=False,
2291                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2292
2293         chapters = self._extract_chapters(description_original, video_duration)
2294
2295         # Look for the DASH manifest
2296         if self._downloader.params.get('youtube_include_dash_manifest', True):
2297             dash_mpd_fatal = True
2298             for mpd_url in dash_mpds:
2299                 dash_formats = {}
2300                 try:
2301                     def decrypt_sig(mobj):
2302                         s = mobj.group(1)
2303                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2304                         return '/signature/%s' % dec_s
2305
2306                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2307
2308                     for df in self._extract_mpd_formats(
2309                             mpd_url, video_id, fatal=dash_mpd_fatal,
2310                             formats_dict=self._formats):
2311                         if not df.get('filesize'):
2312                             df['filesize'] = _extract_filesize(df['url'])
2313                         # Do not overwrite DASH format found in some previous DASH manifest
2314                         if df['format_id'] not in dash_formats:
2315                             dash_formats[df['format_id']] = df
2316                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2317                         # allow them to fail without bug report message if we already have
2318                         # some DASH manifest succeeded. This is temporary workaround to reduce
2319                         # burst of bug reports until we figure out the reason and whether it
2320                         # can be fixed at all.
2321                         dash_mpd_fatal = False
2322                 except (ExtractorError, KeyError) as e:
2323                     self.report_warning(
2324                         'Skipping DASH manifest: %r' % e, video_id)
2325                 if dash_formats:
2326                     # Remove the formats we found through non-DASH, they
2327                     # contain less info and it can be wrong, because we use
2328                     # fixed values (for example the resolution). See
2329                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2330                     # example.
2331                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2332                     formats.extend(dash_formats.values())
2333
2334         # Check for malformed aspect ratio
2335         stretched_m = re.search(
2336             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2337             video_webpage)
2338         if stretched_m:
2339             w = float(stretched_m.group('w'))
2340             h = float(stretched_m.group('h'))
2341             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2342             # We will only process correct ratios.
2343             if w > 0 and h > 0:
2344                 ratio = w / h
2345                 for f in formats:
2346                     if f.get('vcodec') != 'none':
2347                         f['stretched_ratio'] = ratio
2348
2349         if not formats:
2350             token = extract_token(video_info)
2351             if not token:
2352                 if 'reason' in video_info:
2353                     if 'The uploader has not made this video available in your country.' in video_info['reason']:
2354                         regions_allowed = self._html_search_meta(
2355                             'regionsAllowed', video_webpage, default=None)
2356                         countries = regions_allowed.split(',') if regions_allowed else None
2357                         self.raise_geo_restricted(
2358                             msg=video_info['reason'][0], countries=countries)
2359                     reason = video_info['reason'][0]
2360                     if 'Invalid parameters' in reason:
2361                         unavailable_message = extract_unavailable_message()
2362                         if unavailable_message:
2363                             reason = unavailable_message
2364                     raise ExtractorError(
2365                         'YouTube said: %s' % reason,
2366                         expected=True, video_id=video_id)
2367                 else:
2368                     raise ExtractorError(
2369                         '"token" parameter not in video info for unknown reason',
2370                         video_id=video_id)
2371
2372         if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2373             raise ExtractorError('This video is DRM protected.', expected=True)
2374
2375         self._sort_formats(formats)
2376
2377         self.mark_watched(video_id, video_info, player_response)
2378
2379         return {
2380             'id': video_id,
2381             'uploader': video_uploader,
2382             'uploader_id': video_uploader_id,
2383             'uploader_url': video_uploader_url,
2384             'channel_id': channel_id,
2385             'channel_url': channel_url,
2386             'upload_date': upload_date,
2387             'license': video_license,
2388             'creator': video_creator or artist,
2389             'title': video_title,
2390             'alt_title': video_alt_title or track,
2391             'thumbnail': video_thumbnail,
2392             'description': video_description,
2393             'categories': video_categories,
2394             'tags': video_tags,
2395             'subtitles': video_subtitles,
2396             'automatic_captions': automatic_captions,
2397             'duration': video_duration,
2398             'age_limit': 18 if age_gate else 0,
2399             'annotations': video_annotations,
2400             'chapters': chapters,
2401             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2402             'view_count': view_count,
2403             'like_count': like_count,
2404             'dislike_count': dislike_count,
2405             'average_rating': average_rating,
2406             'formats': formats,
2407             'is_live': is_live,
2408             'start_time': start_time,
2409             'end_time': end_time,
2410             'series': series,
2411             'season_number': season_number,
2412             'episode_number': episode_number,
2413             'track': track,
2414             'artist': artist,
2415             'album': album,
2416             'release_date': release_date,
2417             'release_year': release_year,
2418         }
2419
2420
2421 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2422     IE_DESC = 'YouTube.com playlists'
2423     _VALID_URL = r"""(?x)(?:
2424                         (?:https?://)?
2425                         (?:\w+\.)?
2426                         (?:
2427                             (?:
2428                                 youtube\.com|
2429                                 invidio\.us
2430                             )
2431                             /
2432                             (?:
2433                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2434                                \? (?:.*?[&;])*? (?:p|a|list)=
2435                             |  p/
2436                             )|
2437                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2438                         )
2439                         (
2440                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2441                             # Top tracks, they can also include dots
2442                             |(?:MC)[\w\.]*
2443                         )
2444                         .*
2445                      |
2446                         (%(playlist_id)s)
2447                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2448     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2449     _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2450     _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
2451     IE_NAME = 'youtube:playlist'
2452     _TESTS = [{
2453         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2454         'info_dict': {
2455             'title': 'ytdl test PL',
2456             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2457         },
2458         'playlist_count': 3,
2459     }, {
2460         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2461         'info_dict': {
2462             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2463             'title': 'YDL_Empty_List',
2464         },
2465         'playlist_count': 0,
2466         'skip': 'This playlist is private',
2467     }, {
2468         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2469         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2470         'info_dict': {
2471             'title': '29C3: Not my department',
2472             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2473             'uploader': 'Christiaan008',
2474             'uploader_id': 'ChRiStIaAn008',
2475         },
2476         'playlist_count': 95,
2477     }, {
2478         'note': 'issue #673',
2479         'url': 'PLBB231211A4F62143',
2480         'info_dict': {
2481             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2482             'id': 'PLBB231211A4F62143',
2483             'uploader': 'Wickydoo',
2484             'uploader_id': 'Wickydoo',
2485         },
2486         'playlist_mincount': 26,
2487     }, {
2488         'note': 'Large playlist',
2489         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2490         'info_dict': {
2491             'title': 'Uploads from Cauchemar',
2492             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2493             'uploader': 'Cauchemar',
2494             'uploader_id': 'Cauchemar89',
2495         },
2496         'playlist_mincount': 799,
2497     }, {
2498         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2499         'info_dict': {
2500             'title': 'YDL_safe_search',
2501             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2502         },
2503         'playlist_count': 2,
2504         'skip': 'This playlist is private',
2505     }, {
2506         'note': 'embedded',
2507         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2508         'playlist_count': 4,
2509         'info_dict': {
2510             'title': 'JODA15',
2511             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2512             'uploader': 'milan',
2513             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
2514         }
2515     }, {
2516         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2517         'playlist_mincount': 485,
2518         'info_dict': {
2519             'title': '2018 Chinese New Singles (11/6 updated)',
2520             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2521             'uploader': 'LBK',
2522             'uploader_id': 'sdragonfang',
2523         }
2524     }, {
2525         'note': 'Embedded SWF player',
2526         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2527         'playlist_count': 4,
2528         'info_dict': {
2529             'title': 'JODA7',
2530             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2531         },
2532         'skip': 'This playlist does not exist',
2533     }, {
2534         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2535         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2536         'info_dict': {
2537             'title': 'Uploads from Interstellar Movie',
2538             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2539             'uploader': 'Interstellar Movie',
2540             'uploader_id': 'InterstellarMovie1',
2541         },
2542         'playlist_mincount': 21,
2543     }, {
2544         # Playlist URL that does not actually serve a playlist
2545         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2546         'info_dict': {
2547             'id': 'FqZTN594JQw',
2548             'ext': 'webm',
2549             'title': "Smiley's People 01 detective, Adventure Series, Action",
2550             'uploader': 'STREEM',
2551             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2552             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2553             'upload_date': '20150526',
2554             'license': 'Standard YouTube License',
2555             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2556             'categories': ['People & Blogs'],
2557             'tags': list,
2558             'view_count': int,
2559             'like_count': int,
2560             'dislike_count': int,
2561         },
2562         'params': {
2563             'skip_download': True,
2564         },
2565         'skip': 'This video is not available.',
2566         'add_ie': [YoutubeIE.ie_key()],
2567     }, {
2568         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2569         'info_dict': {
2570             'id': 'yeWKywCrFtk',
2571             'ext': 'mp4',
2572             'title': 'Small Scale Baler and Braiding Rugs',
2573             'uploader': 'Backus-Page House Museum',
2574             'uploader_id': 'backuspagemuseum',
2575             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2576             'upload_date': '20161008',
2577             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2578             'categories': ['Nonprofits & Activism'],
2579             'tags': list,
2580             'like_count': int,
2581             'dislike_count': int,
2582         },
2583         'params': {
2584             'noplaylist': True,
2585             'skip_download': True,
2586         },
2587     }, {
2588         # https://github.com/ytdl-org/youtube-dl/issues/21844
2589         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2590         'info_dict': {
2591             'title': 'Data Analysis with Dr Mike Pound',
2592             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2593             'uploader_id': 'Computerphile',
2594             'uploader': 'Computerphile',
2595         },
2596         'playlist_mincount': 11,
2597     }, {
2598         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2599         'only_matching': True,
2600     }, {
2601         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2602         'only_matching': True,
2603     }, {
2604         # music album playlist
2605         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2606         'only_matching': True,
2607     }, {
2608         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2609         'only_matching': True,
2610     }]
2611
2612     def _real_initialize(self):
2613         self._login()
2614
2615     def extract_videos_from_page(self, page):
2616         ids_in_page = []
2617         titles_in_page = []
2618
2619         for item in re.findall(
2620                 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2621             attrs = extract_attributes(item)
2622             video_id = attrs['data-video-id']
2623             video_title = unescapeHTML(attrs.get('data-title'))
2624             if video_title:
2625                 video_title = video_title.strip()
2626             ids_in_page.append(video_id)
2627             titles_in_page.append(video_title)
2628
2629         # Fallback with old _VIDEO_RE
2630         self.extract_videos_from_page_impl(
2631             self._VIDEO_RE, page, ids_in_page, titles_in_page)
2632
2633         # Relaxed fallbacks
2634         self.extract_videos_from_page_impl(
2635             r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2636             ids_in_page, titles_in_page)
2637         self.extract_videos_from_page_impl(
2638             r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2639             ids_in_page, titles_in_page)
2640
2641         return zip(ids_in_page, titles_in_page)
2642
2643     def _extract_mix(self, playlist_id):
2644         # The mixes are generated from a single video
2645         # the id of the playlist is just 'RD' + video_id
2646         ids = []
2647         last_id = playlist_id[-11:]
2648         for n in itertools.count(1):
2649             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2650             webpage = self._download_webpage(
2651                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2652             new_ids = orderedSet(re.findall(
2653                 r'''(?xs)data-video-username=".*?".*?
2654                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2655                 webpage))
2656             # Fetch new pages until all the videos are repeated, it seems that
2657             # there are always 51 unique videos.
2658             new_ids = [_id for _id in new_ids if _id not in ids]
2659             if not new_ids:
2660                 break
2661             ids.extend(new_ids)
2662             last_id = ids[-1]
2663
2664         url_results = self._ids_to_results(ids)
2665
2666         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2667         title_span = (
2668             search_title('playlist-title')
2669             or search_title('title long-title')
2670             or search_title('title'))
2671         title = clean_html(title_span)
2672
2673         return self.playlist_result(url_results, playlist_id, title)
2674
2675     def _extract_playlist(self, playlist_id):
2676         url = self._TEMPLATE_URL % playlist_id
2677         page = self._download_webpage(url, playlist_id)
2678
2679         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2680         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2681             match = match.strip()
2682             # Check if the playlist exists or is private
2683             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2684             if mobj:
2685                 reason = mobj.group('reason')
2686                 message = 'This playlist %s' % reason
2687                 if 'private' in reason:
2688                     message += ', use --username or --netrc to access it'
2689                 message += '.'
2690                 raise ExtractorError(message, expected=True)
2691             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2692                 raise ExtractorError(
2693                     'Invalid parameters. Maybe URL is incorrect.',
2694                     expected=True)
2695             elif re.match(r'[^<]*Choose your language[^<]*', match):
2696                 continue
2697             else:
2698                 self.report_warning('Youtube gives an alert message: ' + match)
2699
2700         playlist_title = self._html_search_regex(
2701             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2702             page, 'title', default=None)
2703
2704         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2705         uploader = self._search_regex(
2706             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2707             page, 'uploader', default=None)
2708         mobj = re.search(
2709             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2710             page)
2711         if mobj:
2712             uploader_id = mobj.group('uploader_id')
2713             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2714         else:
2715             uploader_id = uploader_url = None
2716
2717         has_videos = True
2718
2719         if not playlist_title:
2720             try:
2721                 # Some playlist URLs don't actually serve a playlist (e.g.
2722                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2723                 next(self._entries(page, playlist_id))
2724             except StopIteration:
2725                 has_videos = False
2726
2727         playlist = self.playlist_result(
2728             self._entries(page, playlist_id), playlist_id, playlist_title)
2729         playlist.update({
2730             'uploader': uploader,
2731             'uploader_id': uploader_id,
2732             'uploader_url': uploader_url,
2733         })
2734
2735         return has_videos, playlist
2736
2737     def _check_download_just_video(self, url, playlist_id):
2738         # Check if it's a video-specific URL
2739         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2740         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2741             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2742             'video id', default=None)
2743         if video_id:
2744             if self._downloader.params.get('noplaylist'):
2745                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2746                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2747             else:
2748                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2749                 return video_id, None
2750         return None, None
2751
2752     def _real_extract(self, url):
2753         # Extract playlist id
2754         mobj = re.match(self._VALID_URL, url)
2755         if mobj is None:
2756             raise ExtractorError('Invalid URL: %s' % url)
2757         playlist_id = mobj.group(1) or mobj.group(2)
2758
2759         video_id, video = self._check_download_just_video(url, playlist_id)
2760         if video:
2761             return video
2762
2763         if playlist_id.startswith(('RD', 'UL', 'PU')):
2764             # Mixes require a custom extraction process
2765             return self._extract_mix(playlist_id)
2766
2767         has_videos, playlist = self._extract_playlist(playlist_id)
2768         if has_videos or not video_id:
2769             return playlist
2770
2771         # Some playlist URLs don't actually serve a playlist (see
2772         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2773         # Fallback to plain video extraction if there is a video id
2774         # along with playlist id.
2775         return self.url_result(video_id, 'Youtube', video_id=video_id)
2776
2777
2778 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2779     IE_DESC = 'YouTube.com channels'
2780     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2781     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2782     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2783     IE_NAME = 'youtube:channel'
2784     _TESTS = [{
2785         'note': 'paginated channel',
2786         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2787         'playlist_mincount': 91,
2788         'info_dict': {
2789             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2790             'title': 'Uploads from lex will',
2791             'uploader': 'lex will',
2792             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2793         }
2794     }, {
2795         'note': 'Age restricted channel',
2796         # from https://www.youtube.com/user/DeusExOfficial
2797         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2798         'playlist_mincount': 64,
2799         'info_dict': {
2800             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2801             'title': 'Uploads from Deus Ex',
2802             'uploader': 'Deus Ex',
2803             'uploader_id': 'DeusExOfficial',
2804         },
2805     }, {
2806         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2807         'only_matching': True,
2808     }]
2809
2810     @classmethod
2811     def suitable(cls, url):
2812         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2813                 else super(YoutubeChannelIE, cls).suitable(url))
2814
2815     def _build_template_url(self, url, channel_id):
2816         return self._TEMPLATE_URL % channel_id
2817
2818     def _real_extract(self, url):
2819         channel_id = self._match_id(url)
2820
2821         url = self._build_template_url(url, channel_id)
2822
2823         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2824         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2825         # otherwise fallback on channel by page extraction
2826         channel_page = self._download_webpage(
2827             url + '?view=57', channel_id,
2828             'Downloading channel page', fatal=False)
2829         if channel_page is False:
2830             channel_playlist_id = False
2831         else:
2832             channel_playlist_id = self._html_search_meta(
2833                 'channelId', channel_page, 'channel id', default=None)
2834             if not channel_playlist_id:
2835                 channel_url = self._html_search_meta(
2836                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2837                     channel_page, 'channel url', default=None)
2838                 if channel_url:
2839                     channel_playlist_id = self._search_regex(
2840                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2841                         channel_url, 'channel id', default=None)
2842         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2843             playlist_id = 'UU' + channel_playlist_id[2:]
2844             return self.url_result(
2845                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2846
2847         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2848         autogenerated = re.search(r'''(?x)
2849                 class="[^"]*?(?:
2850                     channel-header-autogenerated-label|
2851                     yt-channel-title-autogenerated
2852                 )[^"]*"''', channel_page) is not None
2853
2854         if autogenerated:
2855             # The videos are contained in a single page
2856             # the ajax pages can't be used, they are empty
2857             entries = [
2858                 self.url_result(
2859                     video_id, 'Youtube', video_id=video_id,
2860                     video_title=video_title)
2861                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2862             return self.playlist_result(entries, channel_id)
2863
2864         try:
2865             next(self._entries(channel_page, channel_id))
2866         except StopIteration:
2867             alert_message = self._html_search_regex(
2868                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2869                 channel_page, 'alert', default=None, group='alert')
2870             if alert_message:
2871                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2872
2873         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2874
2875
2876 class YoutubeUserIE(YoutubeChannelIE):
2877     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2878     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2879     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2880     IE_NAME = 'youtube:user'
2881
2882     _TESTS = [{
2883         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2884         'playlist_mincount': 320,
2885         'info_dict': {
2886             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2887             'title': 'Uploads from The Linux Foundation',
2888             'uploader': 'The Linux Foundation',
2889             'uploader_id': 'TheLinuxFoundation',
2890         }
2891     }, {
2892         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2893         # but not https://www.youtube.com/user/12minuteathlete/videos
2894         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2895         'playlist_mincount': 249,
2896         'info_dict': {
2897             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2898             'title': 'Uploads from 12 Minute Athlete',
2899             'uploader': '12 Minute Athlete',
2900             'uploader_id': 'the12minuteathlete',
2901         }
2902     }, {
2903         'url': 'ytuser:phihag',
2904         'only_matching': True,
2905     }, {
2906         'url': 'https://www.youtube.com/c/gametrailers',
2907         'only_matching': True,
2908     }, {
2909         'url': 'https://www.youtube.com/gametrailers',
2910         'only_matching': True,
2911     }, {
2912         # This channel is not available, geo restricted to JP
2913         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2914         'only_matching': True,
2915     }]
2916
2917     @classmethod
2918     def suitable(cls, url):
2919         # Don't return True if the url can be extracted with other youtube
2920         # extractor, the regex would is too permissive and it would match.
2921         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2922         if any(ie.suitable(url) for ie in other_yt_ies):
2923             return False
2924         else:
2925             return super(YoutubeUserIE, cls).suitable(url)
2926
2927     def _build_template_url(self, url, channel_id):
2928         mobj = re.match(self._VALID_URL, url)
2929         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2930
2931
2932 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2933     IE_DESC = 'YouTube.com live streams'
2934     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2935     IE_NAME = 'youtube:live'
2936
2937     _TESTS = [{
2938         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2939         'info_dict': {
2940             'id': 'a48o2S1cPoo',
2941             'ext': 'mp4',
2942             'title': 'The Young Turks - Live Main Show',
2943             'uploader': 'The Young Turks',
2944             'uploader_id': 'TheYoungTurks',
2945             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2946             'upload_date': '20150715',
2947             'license': 'Standard YouTube License',
2948             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2949             'categories': ['News & Politics'],
2950             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2951             'like_count': int,
2952             'dislike_count': int,
2953         },
2954         'params': {
2955             'skip_download': True,
2956         },
2957     }, {
2958         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2959         'only_matching': True,
2960     }, {
2961         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2962         'only_matching': True,
2963     }, {
2964         'url': 'https://www.youtube.com/TheYoungTurks/live',
2965         'only_matching': True,
2966     }]
2967
2968     def _real_extract(self, url):
2969         mobj = re.match(self._VALID_URL, url)
2970         channel_id = mobj.group('id')
2971         base_url = mobj.group('base_url')
2972         webpage = self._download_webpage(url, channel_id, fatal=False)
2973         if webpage:
2974             page_type = self._og_search_property(
2975                 'type', webpage, 'page type', default='')
2976             video_id = self._html_search_meta(
2977                 'videoId', webpage, 'video id', default=None)
2978             if page_type.startswith('video') and video_id and re.match(
2979                     r'^[0-9A-Za-z_-]{11}$', video_id):
2980                 return self.url_result(video_id, YoutubeIE.ie_key())
2981         return self.url_result(base_url)
2982
2983
2984 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2985     IE_DESC = 'YouTube.com user/channel playlists'
2986     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2987     IE_NAME = 'youtube:playlists'
2988
2989     _TESTS = [{
2990         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2991         'playlist_mincount': 4,
2992         'info_dict': {
2993             'id': 'ThirstForScience',
2994             'title': 'ThirstForScience',
2995         },
2996     }, {
2997         # with "Load more" button
2998         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2999         'playlist_mincount': 70,
3000         'info_dict': {
3001             'id': 'igorkle1',
3002             'title': 'Игорь Клейнер',
3003         },
3004     }, {
3005         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3006         'playlist_mincount': 17,
3007         'info_dict': {
3008             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3009             'title': 'Chem Player',
3010         },
3011         'skip': 'Blocked',
3012     }]
3013
3014
3015 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3016     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3017
3018
3019 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
3020     IE_DESC = 'YouTube.com searches'
3021     # there doesn't appear to be a real limit, for example if you search for
3022     # 'python' you get more than 8.000.000 results
3023     _MAX_RESULTS = float('inf')
3024     IE_NAME = 'youtube:search'
3025     _SEARCH_KEY = 'ytsearch'
3026     _EXTRA_QUERY_ARGS = {}
3027     _TESTS = []
3028
3029     def _get_n_results(self, query, n):
3030         """Get a specified number of results for a query"""
3031
3032         videos = []
3033         limit = n
3034
3035         url_query = {
3036             'search_query': query.encode('utf-8'),
3037         }
3038         url_query.update(self._EXTRA_QUERY_ARGS)
3039         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3040
3041         for pagenum in itertools.count(1):
3042             data = self._download_json(
3043                 result_url, video_id='query "%s"' % query,
3044                 note='Downloading page %s' % pagenum,
3045                 errnote='Unable to download API page',
3046                 query={'spf': 'navigate'})
3047             html_content = data[1]['body']['content']
3048
3049             if 'class="search-message' in html_content:
3050                 raise ExtractorError(
3051                     '[youtube] No video results', expected=True)
3052
3053             new_videos = list(self._process_page(html_content))
3054             videos += new_videos
3055             if not new_videos or len(videos) > limit:
3056                 break
3057             next_link = self._html_search_regex(
3058                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3059                 html_content, 'next link', default=None)
3060             if next_link is None:
3061                 break
3062             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
3063
3064         if len(videos) > n:
3065             videos = videos[:n]
3066         return self.playlist_result(videos, query)
3067
3068
3069 class YoutubeSearchDateIE(YoutubeSearchIE):
3070     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
3071     _SEARCH_KEY = 'ytsearchdate'
3072     IE_DESC = 'YouTube.com searches, newest videos first'
3073     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
3074
3075
3076 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
3077     IE_DESC = 'YouTube.com search URLs'
3078     IE_NAME = 'youtube:search_url'
3079     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
3080     _TESTS = [{
3081         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3082         'playlist_mincount': 5,
3083         'info_dict': {
3084             'title': 'youtube-dl test video',
3085         }
3086     }, {
3087         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3088         'only_matching': True,
3089     }]
3090
3091     def _real_extract(self, url):
3092         mobj = re.match(self._VALID_URL, url)
3093         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3094         webpage = self._download_webpage(url, query)
3095         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3096
3097
3098 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3099     IE_DESC = 'YouTube.com (multi-season) shows'
3100     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3101     IE_NAME = 'youtube:show'
3102     _TESTS = [{
3103         'url': 'https://www.youtube.com/show/airdisasters',
3104         'playlist_mincount': 5,
3105         'info_dict': {
3106             'id': 'airdisasters',
3107             'title': 'Air Disasters',
3108         }
3109     }]
3110
3111     def _real_extract(self, url):
3112         playlist_id = self._match_id(url)
3113         return super(YoutubeShowIE, self)._real_extract(
3114             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3115
3116
3117 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3118     """
3119     Base class for feed extractors
3120     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3121     """
3122     _LOGIN_REQUIRED = True
3123
3124     @property
3125     def IE_NAME(self):
3126         return 'youtube:%s' % self._FEED_NAME
3127
3128     def _real_initialize(self):
3129         self._login()
3130
3131     def _entries(self, page):
3132         # The extraction process is the same as for playlists, but the regex
3133         # for the video ids doesn't contain an index
3134         ids = []
3135         more_widget_html = content_html = page
3136         for page_num in itertools.count(1):
3137             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3138
3139             # 'recommended' feed has infinite 'load more' and each new portion spins
3140             # the same videos in (sometimes) slightly different order, so we'll check
3141             # for unicity and break when portion has no new videos
3142             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3143             if not new_ids:
3144                 break
3145
3146             ids.extend(new_ids)
3147
3148             for entry in self._ids_to_results(new_ids):
3149                 yield entry
3150
3151             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3152             if not mobj:
3153                 break
3154
3155             more = self._download_json(
3156                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3157                 'Downloading page #%s' % page_num,
3158                 transform_source=uppercase_escape)
3159             content_html = more['content_html']
3160             more_widget_html = more['load_more_widget_html']
3161
3162     def _real_extract(self, url):
3163         page = self._download_webpage(
3164             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3165             self._PLAYLIST_TITLE)
3166         return self.playlist_result(
3167             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3168
3169
3170 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3171     IE_NAME = 'youtube:watchlater'
3172     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3173     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3174
3175     _TESTS = [{
3176         'url': 'https://www.youtube.com/playlist?list=WL',
3177         'only_matching': True,
3178     }, {
3179         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3180         'only_matching': True,
3181     }]
3182
3183     def _real_extract(self, url):
3184         _, video = self._check_download_just_video(url, 'WL')
3185         if video:
3186             return video
3187         _, playlist = self._extract_playlist('WL')
3188         return playlist
3189
3190
3191 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3192     IE_NAME = 'youtube:favorites'
3193     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3194     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3195     _LOGIN_REQUIRED = True
3196
3197     def _real_extract(self, url):
3198         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3199         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3200         return self.url_result(playlist_id, 'YoutubePlaylist')
3201
3202
3203 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3204     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3205     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3206     _FEED_NAME = 'recommended'
3207     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3208
3209
3210 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3211     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3212     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3213     _FEED_NAME = 'subscriptions'
3214     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3215
3216
3217 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3218     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3219     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3220     _FEED_NAME = 'history'
3221     _PLAYLIST_TITLE = 'Youtube History'
3222
3223
3224 class YoutubeTruncatedURLIE(InfoExtractor):
3225     IE_NAME = 'youtube:truncated_url'
3226     IE_DESC = False  # Do not list
3227     _VALID_URL = r'''(?x)
3228         (?:https?://)?
3229         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3230         (?:watch\?(?:
3231             feature=[a-z_]+|
3232             annotation_id=annotation_[^&]+|
3233             x-yt-cl=[0-9]+|
3234             hl=[^&]*|
3235             t=[0-9]+
3236         )?
3237         |
3238             attribution_link\?a=[^&]+
3239         )
3240         $
3241     '''
3242
3243     _TESTS = [{
3244         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3245         'only_matching': True,
3246     }, {
3247         'url': 'https://www.youtube.com/watch?',
3248         'only_matching': True,
3249     }, {
3250         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3251         'only_matching': True,
3252     }, {
3253         'url': 'https://www.youtube.com/watch?feature=foo',
3254         'only_matching': True,
3255     }, {
3256         'url': 'https://www.youtube.com/watch?hl=en-GB',
3257         'only_matching': True,
3258     }, {
3259         'url': 'https://www.youtube.com/watch?t=2372',
3260         'only_matching': True,
3261     }]
3262
3263     def _real_extract(self, url):
3264         raise ExtractorError(
3265             'Did you forget to quote the URL? Remember that & is a meta '
3266             'character in most shells, so you want to put the URL in quotes, '
3267             'like  youtube-dl '
3268             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3269             ' or simply  youtube-dl BaW_jenozKc  .',
3270             expected=True)
3271
3272
3273 class YoutubeTruncatedIDIE(InfoExtractor):
3274     IE_NAME = 'youtube:truncated_id'
3275     IE_DESC = False  # Do not list
3276     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3277
3278     _TESTS = [{
3279         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3280         'only_matching': True,
3281     }]
3282
3283     def _real_extract(self, url):
3284         video_id = self._match_id(url)
3285         raise ExtractorError(
3286             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3287             expected=True)