git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         (username, password) = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         tfa = try_get(res, lambda x: x[0][0], list)
 182         if tfa:
 183             tfa_str = try_get(tfa, lambda x: x[2], compat_str)
 184             if tfa_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(tfa, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231         else:
 232             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 233
 234         if not check_cookie_url:
 235             warn('Unable to extract CheckCookie URL')
 236             return False
 237
 238         check_cookie_results = self._download_webpage(
 239             check_cookie_url, None, 'Checking cookie', fatal=False)
 240
 241         if check_cookie_results is False:
 242             return False
 243
 244         if 'https://myaccount.google.com/' not in check_cookie_results:
 245             warn('Unable to log in')
 246             return False
 247
 248         return True
 249
 250     def _download_webpage_handle(self, *args, **kwargs):
 251         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
 252         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 253             *args, **compat_kwargs(kwargs))
 254
 255     def _real_initialize(self):
 256         if self._downloader is None:
 257             return
 258         self._set_language()
 259         if not self._login():
 260             return
 261
 262
 263 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 264     # Extract entries from page with "Load more" button
 265     def _entries(self, page, playlist_id):
 266         more_widget_html = content_html = page
 267         for page_num in itertools.count(1):
 268             for entry in self._process_page(content_html):
 269                 yield entry
 270
 271             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 272             if not mobj:
 273                 break
 274
 275             more = self._download_json(
 276                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 277                 'Downloading page #%s' % page_num,
 278                 transform_source=uppercase_escape)
 279             content_html = more['content_html']
 280             if not content_html.strip():
 281                 # Some webpages show a "Load more" button but they don't
 282                 # have more videos
 283                 break
 284             more_widget_html = more['load_more_widget_html']
 285
 286
 287 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 288     def _process_page(self, content):
 289         for video_id, video_title in self.extract_videos_from_page(content):
 290             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 291
 292     def extract_videos_from_page(self, page):
 293         ids_in_page = []
 294         titles_in_page = []
 295         for mobj in re.finditer(self._VIDEO_RE, page):
 296             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 297             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 298                 continue
 299             video_id = mobj.group('id')
 300             video_title = unescapeHTML(mobj.group('title'))
 301             if video_title:
 302                 video_title = video_title.strip()
 303             try:
 304                 idx = ids_in_page.index(video_id)
 305                 if video_title and not titles_in_page[idx]:
 306                     titles_in_page[idx] = video_title
 307             except ValueError:
 308                 ids_in_page.append(video_id)
 309                 titles_in_page.append(video_title)
 310         return zip(ids_in_page, titles_in_page)
 311
 312
 313 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 314     def _process_page(self, content):
 315         for playlist_id in orderedSet(re.findall(
 316                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 317                 content)):
 318             yield self.url_result(
 319                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 320
 321     def _real_extract(self, url):
 322         playlist_id = self._match_id(url)
 323         webpage = self._download_webpage(url, playlist_id)
 324         title = self._og_search_title(webpage, fatal=False)
 325         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 326
 327
 328 class YoutubeIE(YoutubeBaseInfoExtractor):
 329     IE_DESC = 'YouTube.com'
 330     _VALID_URL = r"""(?x)^
 331                      (
 332                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 333                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 334                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 335                             (?:www\.)?pwnyoutube\.com/|
 336                             (?:www\.)?hooktube\.com/|
 337                             (?:www\.)?yourepeat\.com/|
 338                             tube\.majestyc\.net/|
 339                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 340                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 341                          (?:                                                  # the various things that can precede the ID:
 342                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 343                              |(?:                                             # or the v= param in all its forms
 344                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 345                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 346                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 347                                  v=
 348                              )
 349                          ))
 350                          |(?:
 351                             youtu\.be|                                        # just youtu.be/xxxx
 352                             vid\.plus|                                        # or vid.plus/xxxx
 353                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 354                          )/
 355                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 356                          )
 357                      )?                                                       # all until now is optional -> you can pass the naked ID
 358                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 359                      (?!.*?\blist=
 360                         (?:
 361                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 362                             WL                                                # WL are handled by the watch later IE
 363                         )
 364                      )
 365                      (?(1).+)?                                                # if we found the ID, everything can follow
 366                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 367     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 368     _formats = {
 369         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 370         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 371         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 372         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 373         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 374         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 375         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 376         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 377         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 378         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 379         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 380         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 381         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 382         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 383         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 384         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 385         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 386         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 387
 388
 389         # 3D videos
 390         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 391         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 392         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 393         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 394         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 395         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 396         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 397
 398         # Apple HTTP Live Streaming
 399         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 400         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 401         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 402         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 403         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 404         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 405         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 406         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 407
 408         # DASH mp4 video
 409         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 410         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 411         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 412         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 413         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 414         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 415         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 416         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 417         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 418         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 419         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 420         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 421
 422         # Dash mp4 audio
 423         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 424         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 425         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 426         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 427         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 428         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 429         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 430
 431         # Dash webm
 432         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 433         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 434         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 435         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 436         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 437         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 438         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 439         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 440         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 441         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 442         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 443         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 444         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 445         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 446         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 447         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 448         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 449         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 450         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 451         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 452         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 454
 455         # Dash webm audio
 456         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 457         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 458
 459         # Dash webm audio with opus inside
 460         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 461         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 462         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 463
 464         # RTMP (unnamed)
 465         '_rtmp': {'protocol': 'rtmp'},
 466     }
 467     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 468
 469     _GEO_BYPASS = False
 470
 471     IE_NAME = 'youtube'
 472     _TESTS = [
 473         {
 474             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 475             'info_dict': {
 476                 'id': 'BaW_jenozKc',
 477                 'ext': 'mp4',
 478                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 479                 'uploader': 'Philipp Hagemeister',
 480                 'uploader_id': 'phihag',
 481                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 482                 'upload_date': '20121002',
 483                 'license': 'Standard YouTube License',
 484                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 485                 'categories': ['Science & Technology'],
 486                 'tags': ['youtube-dl'],
 487                 'duration': 10,
 488                 'like_count': int,
 489                 'dislike_count': int,
 490                 'start_time': 1,
 491                 'end_time': 9,
 492             }
 493         },
 494         {
 495             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 496             'note': 'Test generic use_cipher_signature video (#897)',
 497             'info_dict': {
 498                 'id': 'UxxajLWwzqY',
 499                 'ext': 'mp4',
 500                 'upload_date': '20120506',
 501                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 502                 'alt_title': 'I Love It (feat. Charli XCX)',
 503                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 504                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 505                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 506                          'iconic ep', 'iconic', 'love', 'it'],
 507                 'duration': 180,
 508                 'uploader': 'Icona Pop',
 509                 'uploader_id': 'IconaPop',
 510                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 511                 'license': 'Standard YouTube License',
 512                 'creator': 'Icona Pop',
 513             }
 514         },
 515         {
 516             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 517             'note': 'Test VEVO video with age protection (#956)',
 518             'info_dict': {
 519                 'id': '07FYdnEawAQ',
 520                 'ext': 'mp4',
 521                 'upload_date': '20130703',
 522                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 523                 'alt_title': 'Tunnel Vision',
 524                 'description': 'md5:64249768eec3bc4276236606ea996373',
 525                 'duration': 419,
 526                 'uploader': 'justintimberlakeVEVO',
 527                 'uploader_id': 'justintimberlakeVEVO',
 528                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 529                 'license': 'Standard YouTube License',
 530                 'creator': 'Justin Timberlake',
 531                 'age_limit': 18,
 532             }
 533         },
 534         {
 535             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 536             'note': 'Embed-only video (#1746)',
 537             'info_dict': {
 538                 'id': 'yZIXLfi8CZQ',
 539                 'ext': 'mp4',
 540                 'upload_date': '20120608',
 541                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 542                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 543                 'uploader': 'SET India',
 544                 'uploader_id': 'setindia',
 545                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 546                 'license': 'Standard YouTube License',
 547                 'age_limit': 18,
 548             }
 549         },
 550         {
 551             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 552             'note': 'Use the first video ID in the URL',
 553             'info_dict': {
 554                 'id': 'BaW_jenozKc',
 555                 'ext': 'mp4',
 556                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 557                 'uploader': 'Philipp Hagemeister',
 558                 'uploader_id': 'phihag',
 559                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 560                 'upload_date': '20121002',
 561                 'license': 'Standard YouTube License',
 562                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 563                 'categories': ['Science & Technology'],
 564                 'tags': ['youtube-dl'],
 565                 'duration': 10,
 566                 'like_count': int,
 567                 'dislike_count': int,
 568             },
 569             'params': {
 570                 'skip_download': True,
 571             },
 572         },
 573         {
 574             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 575             'note': '256k DASH audio (format 141) via DASH manifest',
 576             'info_dict': {
 577                 'id': 'a9LDPn-MO4I',
 578                 'ext': 'm4a',
 579                 'upload_date': '20121002',
 580                 'uploader_id': '8KVIDEO',
 581                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 582                 'description': '',
 583                 'uploader': '8KVIDEO',
 584                 'license': 'Standard YouTube License',
 585                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 586             },
 587             'params': {
 588                 'youtube_include_dash_manifest': True,
 589                 'format': '141',
 590             },
 591             'skip': 'format 141 not served anymore',
 592         },
 593         # DASH manifest with encrypted signature
 594         {
 595             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 596             'info_dict': {
 597                 'id': 'IB3lcPjvWLA',
 598                 'ext': 'm4a',
 599                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 600                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 601                 'duration': 244,
 602                 'uploader': 'AfrojackVEVO',
 603                 'uploader_id': 'AfrojackVEVO',
 604                 'upload_date': '20131011',
 605                 'license': 'Standard YouTube License',
 606             },
 607             'params': {
 608                 'youtube_include_dash_manifest': True,
 609                 'format': '141/bestaudio[ext=m4a]',
 610             },
 611         },
 612         # JS player signature function name containing $
 613         {
 614             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 615             'info_dict': {
 616                 'id': 'nfWlot6h_JM',
 617                 'ext': 'm4a',
 618                 'title': 'Taylor Swift - Shake It Off',
 619                 'alt_title': 'Shake It Off',
 620                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 621                 'duration': 242,
 622                 'uploader': 'TaylorSwiftVEVO',
 623                 'uploader_id': 'TaylorSwiftVEVO',
 624                 'upload_date': '20140818',
 625                 'license': 'Standard YouTube License',
 626                 'creator': 'Taylor Swift',
 627             },
 628             'params': {
 629                 'youtube_include_dash_manifest': True,
 630                 'format': '141/bestaudio[ext=m4a]',
 631             },
 632         },
 633         # Controversy video
 634         {
 635             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 636             'info_dict': {
 637                 'id': 'T4XJQO3qol8',
 638                 'ext': 'mp4',
 639                 'duration': 219,
 640                 'upload_date': '20100909',
 641                 'uploader': 'The Amazing Atheist',
 642                 'uploader_id': 'TheAmazingAtheist',
 643                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 644                 'license': 'Standard YouTube License',
 645                 'title': 'Burning Everyone\'s Koran',
 646                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 647             }
 648         },
 649         # Normal age-gate video (No vevo, embed allowed)
 650         {
 651             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 652             'info_dict': {
 653                 'id': 'HtVdAasjOgU',
 654                 'ext': 'mp4',
 655                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 656                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 657                 'duration': 142,
 658                 'uploader': 'The Witcher',
 659                 'uploader_id': 'WitcherGame',
 660                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 661                 'upload_date': '20140605',
 662                 'license': 'Standard YouTube License',
 663                 'age_limit': 18,
 664             },
 665         },
 666         # Age-gate video with encrypted signature
 667         {
 668             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 669             'info_dict': {
 670                 'id': '6kLq3WMV1nU',
 671                 'ext': 'mp4',
 672                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 673                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 674                 'duration': 247,
 675                 'uploader': 'LloydVEVO',
 676                 'uploader_id': 'LloydVEVO',
 677                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 678                 'upload_date': '20110629',
 679                 'license': 'Standard YouTube License',
 680                 'age_limit': 18,
 681             },
 682         },
 683         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 684         # YouTube Red ad is not captured for creator
 685         {
 686             'url': '__2ABJjxzNo',
 687             'info_dict': {
 688                 'id': '__2ABJjxzNo',
 689                 'ext': 'mp4',
 690                 'duration': 266,
 691                 'upload_date': '20100430',
 692                 'uploader_id': 'deadmau5',
 693                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 694                 'creator': 'deadmau5',
 695                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 696                 'uploader': 'deadmau5',
 697                 'license': 'Standard YouTube License',
 698                 'title': 'Deadmau5 - Some Chords (HD)',
 699                 'alt_title': 'Some Chords',
 700             },
 701             'expected_warnings': [
 702                 'DASH manifest missing',
 703             ]
 704         },
 705         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 706         {
 707             'url': 'lqQg6PlCWgI',
 708             'info_dict': {
 709                 'id': 'lqQg6PlCWgI',
 710                 'ext': 'mp4',
 711                 'duration': 6085,
 712                 'upload_date': '20150827',
 713                 'uploader_id': 'olympic',
 714                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 715                 'license': 'Standard YouTube License',
 716                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 717                 'uploader': 'Olympic',
 718                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 719             },
 720             'params': {
 721                 'skip_download': 'requires avconv',
 722             }
 723         },
 724         # Non-square pixels
 725         {
 726             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 727             'info_dict': {
 728                 'id': '_b-2C3KPAM0',
 729                 'ext': 'mp4',
 730                 'stretched_ratio': 16 / 9.,
 731                 'duration': 85,
 732                 'upload_date': '20110310',
 733                 'uploader_id': 'AllenMeow',
 734                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 735                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 736                 'uploader': '孫艾倫',
 737                 'license': 'Standard YouTube License',
 738                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 739             },
 740         },
 741         # url_encoded_fmt_stream_map is empty string
 742         {
 743             'url': 'qEJwOuvDf7I',
 744             'info_dict': {
 745                 'id': 'qEJwOuvDf7I',
 746                 'ext': 'webm',
 747                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 748                 'description': '',
 749                 'upload_date': '20150404',
 750                 'uploader_id': 'spbelect',
 751                 'uploader': 'Наблюдатели Петербурга',
 752             },
 753             'params': {
 754                 'skip_download': 'requires avconv',
 755             },
 756             'skip': 'This live event has ended.',
 757         },
 758         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 759         {
 760             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 761             'info_dict': {
 762                 'id': 'FIl7x6_3R5Y',
 763                 'ext': 'mp4',
 764                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 765                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 766                 'duration': 220,
 767                 'upload_date': '20150625',
 768                 'uploader_id': 'dorappi2000',
 769                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 770                 'uploader': 'dorappi2000',
 771                 'license': 'Standard YouTube License',
 772                 'formats': 'mincount:32',
 773             },
 774         },
 775         # DASH manifest with segment_list
 776         {
 777             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 778             'md5': '8ce563a1d667b599d21064e982ab9e31',
 779             'info_dict': {
 780                 'id': 'CsmdDsKjzN8',
 781                 'ext': 'mp4',
 782                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 783                 'uploader': 'Airtek',
 784                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 785                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 786                 'license': 'Standard YouTube License',
 787                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 788             },
 789             'params': {
 790                 'youtube_include_dash_manifest': True,
 791                 'format': '135',  # bestvideo
 792             },
 793             'skip': 'This live event has ended.',
 794         },
 795         {
 796             # Multifeed videos (multiple cameras), URL is for Main Camera
 797             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 798             'info_dict': {
 799                 'id': 'jqWvoWXjCVs',
 800                 'title': 'teamPGP: Rocket League Noob Stream',
 801                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 802             },
 803             'playlist': [{
 804                 'info_dict': {
 805                     'id': 'jqWvoWXjCVs',
 806                     'ext': 'mp4',
 807                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 808                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 809                     'duration': 7335,
 810                     'upload_date': '20150721',
 811                     'uploader': 'Beer Games Beer',
 812                     'uploader_id': 'beergamesbeer',
 813                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 814                     'license': 'Standard YouTube License',
 815                 },
 816             }, {
 817                 'info_dict': {
 818                     'id': '6h8e8xoXJzg',
 819                     'ext': 'mp4',
 820                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 821                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 822                     'duration': 7337,
 823                     'upload_date': '20150721',
 824                     'uploader': 'Beer Games Beer',
 825                     'uploader_id': 'beergamesbeer',
 826                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 827                     'license': 'Standard YouTube License',
 828                 },
 829             }, {
 830                 'info_dict': {
 831                     'id': 'PUOgX5z9xZw',
 832                     'ext': 'mp4',
 833                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 834                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 835                     'duration': 7337,
 836                     'upload_date': '20150721',
 837                     'uploader': 'Beer Games Beer',
 838                     'uploader_id': 'beergamesbeer',
 839                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 840                     'license': 'Standard YouTube License',
 841                 },
 842             }, {
 843                 'info_dict': {
 844                     'id': 'teuwxikvS5k',
 845                     'ext': 'mp4',
 846                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 847                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 848                     'duration': 7334,
 849                     'upload_date': '20150721',
 850                     'uploader': 'Beer Games Beer',
 851                     'uploader_id': 'beergamesbeer',
 852                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 853                     'license': 'Standard YouTube License',
 854                 },
 855             }],
 856             'params': {
 857                 'skip_download': True,
 858             },
 859         },
 860         {
 861             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 862             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 863             'info_dict': {
 864                 'id': 'gVfLd0zydlo',
 865                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 866             },
 867             'playlist_count': 2,
 868             'skip': 'Not multifeed anymore',
 869         },
 870         {
 871             'url': 'https://vid.plus/FlRa-iH7PGw',
 872             'only_matching': True,
 873         },
 874         {
 875             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 876             'only_matching': True,
 877         },
 878         {
 879             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 880             # Also tests cut-off URL expansion in video description (see
 881             # https://github.com/rg3/youtube-dl/issues/1892,
 882             # https://github.com/rg3/youtube-dl/issues/8164)
 883             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 884             'info_dict': {
 885                 'id': 'lsguqyKfVQg',
 886                 'ext': 'mp4',
 887                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 888                 'alt_title': 'Dark Walk',
 889                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 890                 'duration': 133,
 891                 'upload_date': '20151119',
 892                 'uploader_id': 'IronSoulElf',
 893                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 894                 'uploader': 'IronSoulElf',
 895                 'license': 'Standard YouTube License',
 896                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
 897             },
 898             'params': {
 899                 'skip_download': True,
 900             },
 901         },
 902         {
 903             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 904             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 905             'only_matching': True,
 906         },
 907         {
 908             # Video with yt:stretch=17:0
 909             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 910             'info_dict': {
 911                 'id': 'Q39EVAstoRM',
 912                 'ext': 'mp4',
 913                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 914                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 915                 'upload_date': '20151107',
 916                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 917                 'uploader': 'CH GAMER DROID',
 918             },
 919             'params': {
 920                 'skip_download': True,
 921             },
 922             'skip': 'This video does not exist.',
 923         },
 924         {
 925             # Video licensed under Creative Commons
 926             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 927             'info_dict': {
 928                 'id': 'M4gD1WSo5mA',
 929                 'ext': 'mp4',
 930                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 931                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 932                 'duration': 721,
 933                 'upload_date': '20150127',
 934                 'uploader_id': 'BerkmanCenter',
 935                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 936                 'uploader': 'The Berkman Klein Center for Internet & Society',
 937                 'license': 'Creative Commons Attribution license (reuse allowed)',
 938             },
 939             'params': {
 940                 'skip_download': True,
 941             },
 942         },
 943         {
 944             # Channel-like uploader_url
 945             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 946             'info_dict': {
 947                 'id': 'eQcmzGIKrzg',
 948                 'ext': 'mp4',
 949                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 950                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 951                 'duration': 4060,
 952                 'upload_date': '20151119',
 953                 'uploader': 'Bernie 2016',
 954                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 955                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 956                 'license': 'Creative Commons Attribution license (reuse allowed)',
 957             },
 958             'params': {
 959                 'skip_download': True,
 960             },
 961         },
 962         {
 963             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 964             'only_matching': True,
 965         },
 966         {
 967             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 968             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 969             'only_matching': True,
 970         },
 971         {
 972             # Rental video preview
 973             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 974             'info_dict': {
 975                 'id': 'uGpuVWrhIzE',
 976                 'ext': 'mp4',
 977                 'title': 'Piku - Trailer',
 978                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 979                 'upload_date': '20150811',
 980                 'uploader': 'FlixMatrix',
 981                 'uploader_id': 'FlixMatrixKaravan',
 982                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 983                 'license': 'Standard YouTube License',
 984             },
 985             'params': {
 986                 'skip_download': True,
 987             },
 988         },
 989         {
 990             # YouTube Red video with episode data
 991             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 992             'info_dict': {
 993                 'id': 'iqKdEhx-dD4',
 994                 'ext': 'mp4',
 995                 'title': 'Isolation - Mind Field (Ep 1)',
 996                 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
 997                 'duration': 2085,
 998                 'upload_date': '20170118',
 999                 'uploader': 'Vsauce',
1000                 'uploader_id': 'Vsauce',
1001                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1002                 'license': 'Standard YouTube License',
1003                 'series': 'Mind Field',
1004                 'season_number': 1,
1005                 'episode_number': 1,
1006             },
1007             'params': {
1008                 'skip_download': True,
1009             },
1010             'expected_warnings': [
1011                 'Skipping DASH manifest',
1012             ],
1013         },
1014         {
1015             # The following content has been identified by the YouTube community
1016             # as inappropriate or offensive to some audiences.
1017             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1018             'info_dict': {
1019                 'id': '6SJNVb0GnPI',
1020                 'ext': 'mp4',
1021                 'title': 'Race Differences in Intelligence',
1022                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1023                 'duration': 965,
1024                 'upload_date': '20140124',
1025                 'uploader': 'New Century Foundation',
1026                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1027                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1028                 'license': 'Standard YouTube License',
1029                 'view_count': int,
1030             },
1031             'params': {
1032                 'skip_download': True,
1033             },
1034         },
1035         {
1036             # itag 212
1037             'url': '1t24XAntNCY',
1038             'only_matching': True,
1039         },
1040         {
1041             # geo restricted to JP
1042             'url': 'sJL6WA-aGkQ',
1043             'only_matching': True,
1044         },
1045         {
1046             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1047             'only_matching': True,
1048         },
1049     ]
1050
1051     def __init__(self, *args, **kwargs):
1052         super(YoutubeIE, self).__init__(*args, **kwargs)
1053         self._player_cache = {}
1054
1055     def report_video_info_webpage_download(self, video_id):
1056         """Report attempt to download video info webpage."""
1057         self.to_screen('%s: Downloading video info webpage' % video_id)
1058
1059     def report_information_extraction(self, video_id):
1060         """Report attempt to extract video information."""
1061         self.to_screen('%s: Extracting video information' % video_id)
1062
1063     def report_unavailable_format(self, video_id, format):
1064         """Report extracted video URL."""
1065         self.to_screen('%s: Format %s not available' % (video_id, format))
1066
1067     def report_rtmp_download(self):
1068         """Indicate the download will use the RTMP protocol."""
1069         self.to_screen('RTMP download detected')
1070
1071     def _signature_cache_id(self, example_sig):
1072         """ Return a string representation of a signature """
1073         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1074
1075     def _extract_signature_function(self, video_id, player_url, example_sig):
1076         id_m = re.match(
1077             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1078             player_url)
1079         if not id_m:
1080             raise ExtractorError('Cannot identify player %r' % player_url)
1081         player_type = id_m.group('ext')
1082         player_id = id_m.group('id')
1083
1084         # Read from filesystem cache
1085         func_id = '%s_%s_%s' % (
1086             player_type, player_id, self._signature_cache_id(example_sig))
1087         assert os.path.basename(func_id) == func_id
1088
1089         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1090         if cache_spec is not None:
1091             return lambda s: ''.join(s[i] for i in cache_spec)
1092
1093         download_note = (
1094             'Downloading player %s' % player_url
1095             if self._downloader.params.get('verbose') else
1096             'Downloading %s player %s' % (player_type, player_id)
1097         )
1098         if player_type == 'js':
1099             code = self._download_webpage(
1100                 player_url, video_id,
1101                 note=download_note,
1102                 errnote='Download of %s failed' % player_url)
1103             res = self._parse_sig_js(code)
1104         elif player_type == 'swf':
1105             urlh = self._request_webpage(
1106                 player_url, video_id,
1107                 note=download_note,
1108                 errnote='Download of %s failed' % player_url)
1109             code = urlh.read()
1110             res = self._parse_sig_swf(code)
1111         else:
1112             assert False, 'Invalid player type %r' % player_type
1113
1114         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1115         cache_res = res(test_string)
1116         cache_spec = [ord(c) for c in cache_res]
1117
1118         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1119         return res
1120
1121     def _print_sig_code(self, func, example_sig):
1122         def gen_sig_code(idxs):
1123             def _genslice(start, end, step):
1124                 starts = '' if start == 0 else str(start)
1125                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1126                 steps = '' if step == 1 else (':%d' % step)
1127                 return 's[%s%s%s]' % (starts, ends, steps)
1128
1129             step = None
1130             # Quelch pyflakes warnings - start will be set when step is set
1131             start = '(Never used)'
1132             for i, prev in zip(idxs[1:], idxs[:-1]):
1133                 if step is not None:
1134                     if i - prev == step:
1135                         continue
1136                     yield _genslice(start, prev, step)
1137                     step = None
1138                     continue
1139                 if i - prev in [-1, 1]:
1140                     step = i - prev
1141                     start = prev
1142                     continue
1143                 else:
1144                     yield 's[%d]' % prev
1145             if step is None:
1146                 yield 's[%d]' % i
1147             else:
1148                 yield _genslice(start, i, step)
1149
1150         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1151         cache_res = func(test_string)
1152         cache_spec = [ord(c) for c in cache_res]
1153         expr_code = ' + '.join(gen_sig_code(cache_spec))
1154         signature_id_tuple = '(%s)' % (
1155             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1156         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1157                 '    return %s\n') % (signature_id_tuple, expr_code)
1158         self.to_screen('Extracted signature function:\n' + code)
1159
1160     def _parse_sig_js(self, jscode):
1161         funcname = self._search_regex(
1162             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1163              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1164             jscode, 'Initial JS player signature function name', group='sig')
1165
1166         jsi = JSInterpreter(jscode)
1167         initial_function = jsi.extract_function(funcname)
1168         return lambda s: initial_function([s])
1169
1170     def _parse_sig_swf(self, file_contents):
1171         swfi = SWFInterpreter(file_contents)
1172         TARGET_CLASSNAME = 'SignatureDecipher'
1173         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1174         initial_function = swfi.extract_function(searched_class, 'decipher')
1175         return lambda s: initial_function([s])
1176
1177     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1178         """Turn the encrypted s field into a working signature"""
1179
1180         if player_url is None:
1181             raise ExtractorError('Cannot decrypt signature without player_url')
1182
1183         if player_url.startswith('//'):
1184             player_url = 'https:' + player_url
1185         elif not re.match(r'https?://', player_url):
1186             player_url = compat_urlparse.urljoin(
1187                 'https://www.youtube.com', player_url)
1188         try:
1189             player_id = (player_url, self._signature_cache_id(s))
1190             if player_id not in self._player_cache:
1191                 func = self._extract_signature_function(
1192                     video_id, player_url, s
1193                 )
1194                 self._player_cache[player_id] = func
1195             func = self._player_cache[player_id]
1196             if self._downloader.params.get('youtube_print_sig_code'):
1197                 self._print_sig_code(func, s)
1198             return func(s)
1199         except Exception as e:
1200             tb = traceback.format_exc()
1201             raise ExtractorError(
1202                 'Signature extraction failed: ' + tb, cause=e)
1203
1204     def _get_subtitles(self, video_id, webpage):
1205         try:
1206             subs_doc = self._download_xml(
1207                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1208                 video_id, note=False)
1209         except ExtractorError as err:
1210             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1211             return {}
1212
1213         sub_lang_list = {}
1214         for track in subs_doc.findall('track'):
1215             lang = track.attrib['lang_code']
1216             if lang in sub_lang_list:
1217                 continue
1218             sub_formats = []
1219             for ext in self._SUBTITLE_FORMATS:
1220                 params = compat_urllib_parse_urlencode({
1221                     'lang': lang,
1222                     'v': video_id,
1223                     'fmt': ext,
1224                     'name': track.attrib['name'].encode('utf-8'),
1225                 })
1226                 sub_formats.append({
1227                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1228                     'ext': ext,
1229                 })
1230             sub_lang_list[lang] = sub_formats
1231         if not sub_lang_list:
1232             self._downloader.report_warning('video doesn\'t have subtitles')
1233             return {}
1234         return sub_lang_list
1235
1236     def _get_ytplayer_config(self, video_id, webpage):
1237         patterns = (
1238             # User data may contain arbitrary character sequences that may affect
1239             # JSON extraction with regex, e.g. when '};' is contained the second
1240             # regex won't capture the whole JSON. Yet working around by trying more
1241             # concrete regex first keeping in mind proper quoted string handling
1242             # to be implemented in future that will replace this workaround (see
1243             # https://github.com/rg3/youtube-dl/issues/7468,
1244             # https://github.com/rg3/youtube-dl/pull/7599)
1245             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1246             r';ytplayer\.config\s*=\s*({.+?});',
1247         )
1248         config = self._search_regex(
1249             patterns, webpage, 'ytplayer.config', default=None)
1250         if config:
1251             return self._parse_json(
1252                 uppercase_escape(config), video_id, fatal=False)
1253
1254     def _get_automatic_captions(self, video_id, webpage):
1255         """We need the webpage for getting the captions url, pass it as an
1256            argument to speed up the process."""
1257         self.to_screen('%s: Looking for automatic captions' % video_id)
1258         player_config = self._get_ytplayer_config(video_id, webpage)
1259         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1260         if not player_config:
1261             self._downloader.report_warning(err_msg)
1262             return {}
1263         try:
1264             args = player_config['args']
1265             caption_url = args.get('ttsurl')
1266             if caption_url:
1267                 timestamp = args['timestamp']
1268                 # We get the available subtitles
1269                 list_params = compat_urllib_parse_urlencode({
1270                     'type': 'list',
1271                     'tlangs': 1,
1272                     'asrs': 1,
1273                 })
1274                 list_url = caption_url + '&' + list_params
1275                 caption_list = self._download_xml(list_url, video_id)
1276                 original_lang_node = caption_list.find('track')
1277                 if original_lang_node is None:
1278                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1279                     return {}
1280                 original_lang = original_lang_node.attrib['lang_code']
1281                 caption_kind = original_lang_node.attrib.get('kind', '')
1282
1283                 sub_lang_list = {}
1284                 for lang_node in caption_list.findall('target'):
1285                     sub_lang = lang_node.attrib['lang_code']
1286                     sub_formats = []
1287                     for ext in self._SUBTITLE_FORMATS:
1288                         params = compat_urllib_parse_urlencode({
1289                             'lang': original_lang,
1290                             'tlang': sub_lang,
1291                             'fmt': ext,
1292                             'ts': timestamp,
1293                             'kind': caption_kind,
1294                         })
1295                         sub_formats.append({
1296                             'url': caption_url + '&' + params,
1297                             'ext': ext,
1298                         })
1299                     sub_lang_list[sub_lang] = sub_formats
1300                 return sub_lang_list
1301
1302             def make_captions(sub_url, sub_langs):
1303                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1304                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1305                 captions = {}
1306                 for sub_lang in sub_langs:
1307                     sub_formats = []
1308                     for ext in self._SUBTITLE_FORMATS:
1309                         caption_qs.update({
1310                             'tlang': [sub_lang],
1311                             'fmt': [ext],
1312                         })
1313                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1314                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1315                         sub_formats.append({
1316                             'url': sub_url,
1317                             'ext': ext,
1318                         })
1319                     captions[sub_lang] = sub_formats
1320                 return captions
1321
1322             # New captions format as of 22.06.2017
1323             player_response = args.get('player_response')
1324             if player_response and isinstance(player_response, compat_str):
1325                 player_response = self._parse_json(
1326                     player_response, video_id, fatal=False)
1327                 if player_response:
1328                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1329                     base_url = renderer['captionTracks'][0]['baseUrl']
1330                     sub_lang_list = []
1331                     for lang in renderer['translationLanguages']:
1332                         lang_code = lang.get('languageCode')
1333                         if lang_code:
1334                             sub_lang_list.append(lang_code)
1335                     return make_captions(base_url, sub_lang_list)
1336
1337             # Some videos don't provide ttsurl but rather caption_tracks and
1338             # caption_translation_languages (e.g. 20LmZk1hakA)
1339             # Does not used anymore as of 22.06.2017
1340             caption_tracks = args['caption_tracks']
1341             caption_translation_languages = args['caption_translation_languages']
1342             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1343             sub_lang_list = []
1344             for lang in caption_translation_languages.split(','):
1345                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1346                 sub_lang = lang_qs.get('lc', [None])[0]
1347                 if sub_lang:
1348                     sub_lang_list.append(sub_lang)
1349             return make_captions(caption_url, sub_lang_list)
1350         # An extractor error can be raise by the download process if there are
1351         # no automatic captions but there are subtitles
1352         except (KeyError, IndexError, ExtractorError):
1353             self._downloader.report_warning(err_msg)
1354             return {}
1355
1356     def _mark_watched(self, video_id, video_info):
1357         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1358         if not playback_url:
1359             return
1360         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1361         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1362
1363         # cpn generation algorithm is reverse engineered from base.js.
1364         # In fact it works even with dummy cpn.
1365         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1366         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1367
1368         qs.update({
1369             'ver': ['2'],
1370             'cpn': [cpn],
1371         })
1372         playback_url = compat_urlparse.urlunparse(
1373             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1374
1375         self._download_webpage(
1376             playback_url, video_id, 'Marking watched',
1377             'Unable to mark watched', fatal=False)
1378
1379     @staticmethod
1380     def _extract_urls(webpage):
1381         # Embedded YouTube player
1382         entries = [
1383             unescapeHTML(mobj.group('url'))
1384             for mobj in re.finditer(r'''(?x)
1385             (?:
1386                 <iframe[^>]+?src=|
1387                 data-video-url=|
1388                 <embed[^>]+?src=|
1389                 embedSWF\(?:\s*|
1390                 <object[^>]+data=|
1391                 new\s+SWFObject\(
1392             )
1393             (["\'])
1394                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1395                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1396             \1''', webpage)]
1397
1398         # lazyYT YouTube embed
1399         entries.extend(list(map(
1400             unescapeHTML,
1401             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1402
1403         # Wordpress "YouTube Video Importer" plugin
1404         matches = re.findall(r'''(?x)<div[^>]+
1405             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1406             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1407         entries.extend(m[-1] for m in matches)
1408
1409         return entries
1410
1411     @staticmethod
1412     def _extract_url(webpage):
1413         urls = YoutubeIE._extract_urls(webpage)
1414         return urls[0] if urls else None
1415
1416     @classmethod
1417     def extract_id(cls, url):
1418         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1419         if mobj is None:
1420             raise ExtractorError('Invalid URL: %s' % url)
1421         video_id = mobj.group(2)
1422         return video_id
1423
1424     def _extract_annotations(self, video_id):
1425         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1426         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1427
1428     @staticmethod
1429     def _extract_chapters(description, duration):
1430         if not description:
1431             return None
1432         chapter_lines = re.findall(
1433             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1434             description)
1435         if not chapter_lines:
1436             return None
1437         chapters = []
1438         for next_num, (chapter_line, time_point) in enumerate(
1439                 chapter_lines, start=1):
1440             start_time = parse_duration(time_point)
1441             if start_time is None:
1442                 continue
1443             if start_time > duration:
1444                 break
1445             end_time = (duration if next_num == len(chapter_lines)
1446                         else parse_duration(chapter_lines[next_num][1]))
1447             if end_time is None:
1448                 continue
1449             if end_time > duration:
1450                 end_time = duration
1451             if start_time > end_time:
1452                 break
1453             chapter_title = re.sub(
1454                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1455             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1456             chapters.append({
1457                 'start_time': start_time,
1458                 'end_time': end_time,
1459                 'title': chapter_title,
1460             })
1461         return chapters
1462
1463     def _real_extract(self, url):
1464         url, smuggled_data = unsmuggle_url(url, {})
1465
1466         proto = (
1467             'http' if self._downloader.params.get('prefer_insecure', False)
1468             else 'https')
1469
1470         start_time = None
1471         end_time = None
1472         parsed_url = compat_urllib_parse_urlparse(url)
1473         for component in [parsed_url.fragment, parsed_url.query]:
1474             query = compat_parse_qs(component)
1475             if start_time is None and 't' in query:
1476                 start_time = parse_duration(query['t'][0])
1477             if start_time is None and 'start' in query:
1478                 start_time = parse_duration(query['start'][0])
1479             if end_time is None and 'end' in query:
1480                 end_time = parse_duration(query['end'][0])
1481
1482         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1483         mobj = re.search(self._NEXT_URL_RE, url)
1484         if mobj:
1485             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1486         video_id = self.extract_id(url)
1487
1488         # Get video webpage
1489         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1490         video_webpage = self._download_webpage(url, video_id)
1491
1492         # Attempt to extract SWF player URL
1493         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1494         if mobj is not None:
1495             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1496         else:
1497             player_url = None
1498
1499         dash_mpds = []
1500
1501         def add_dash_mpd(video_info):
1502             dash_mpd = video_info.get('dashmpd')
1503             if dash_mpd and dash_mpd[0] not in dash_mpds:
1504                 dash_mpds.append(dash_mpd[0])
1505
1506         is_live = None
1507         view_count = None
1508
1509         def extract_view_count(v_info):
1510             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1511
1512         # Get video info
1513         embed_webpage = None
1514         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1515             age_gate = True
1516             # We simulate the access to the video from www.youtube.com/v/{video_id}
1517             # this can be viewed without login into Youtube
1518             url = proto + '://www.youtube.com/embed/%s' % video_id
1519             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1520             data = compat_urllib_parse_urlencode({
1521                 'video_id': video_id,
1522                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1523                 'sts': self._search_regex(
1524                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1525             })
1526             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1527             video_info_webpage = self._download_webpage(
1528                 video_info_url, video_id,
1529                 note='Refetching age-gated info webpage',
1530                 errnote='unable to download video info webpage')
1531             video_info = compat_parse_qs(video_info_webpage)
1532             add_dash_mpd(video_info)
1533         else:
1534             age_gate = False
1535             video_info = None
1536             sts = None
1537             # Try looking directly into the video webpage
1538             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1539             if ytplayer_config:
1540                 args = ytplayer_config['args']
1541                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1542                     # Convert to the same format returned by compat_parse_qs
1543                     video_info = dict((k, [v]) for k, v in args.items())
1544                     add_dash_mpd(video_info)
1545                 # Rental video is not rented but preview is available (e.g.
1546                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1547                 # https://github.com/rg3/youtube-dl/issues/10532)
1548                 if not video_info and args.get('ypc_vid'):
1549                     return self.url_result(
1550                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1551                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1552                     is_live = True
1553                 sts = ytplayer_config.get('sts')
1554             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1555                 # We also try looking in get_video_info since it may contain different dashmpd
1556                 # URL that points to a DASH manifest with possibly different itag set (some itags
1557                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1558                 # manifest pointed by get_video_info's dashmpd).
1559                 # The general idea is to take a union of itags of both DASH manifests (for example
1560                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1561                 self.report_video_info_webpage_download(video_id)
1562                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1563                     query = {
1564                         'video_id': video_id,
1565                         'ps': 'default',
1566                         'eurl': '',
1567                         'gl': 'US',
1568                         'hl': 'en',
1569                     }
1570                     if el:
1571                         query['el'] = el
1572                     if sts:
1573                         query['sts'] = sts
1574                     video_info_webpage = self._download_webpage(
1575                         '%s://www.youtube.com/get_video_info' % proto,
1576                         video_id, note=False,
1577                         errnote='unable to download video info webpage',
1578                         fatal=False, query=query)
1579                     if not video_info_webpage:
1580                         continue
1581                     get_video_info = compat_parse_qs(video_info_webpage)
1582                     add_dash_mpd(get_video_info)
1583                     if view_count is None:
1584                         view_count = extract_view_count(get_video_info)
1585                     if not video_info:
1586                         video_info = get_video_info
1587                     if 'token' in get_video_info:
1588                         # Different get_video_info requests may report different results, e.g.
1589                         # some may report video unavailability, but some may serve it without
1590                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1591                         # the original webpage as well as el=info and el=embedded get_video_info
1592                         # requests report video unavailability due to geo restriction while
1593                         # el=detailpage succeeds and returns valid data). This is probably
1594                         # due to YouTube measures against IP ranges of hosting providers.
1595                         # Working around by preferring the first succeeded video_info containing
1596                         # the token if no such video_info yet was found.
1597                         if 'token' not in video_info:
1598                             video_info = get_video_info
1599                         break
1600
1601         def extract_unavailable_message():
1602             return self._html_search_regex(
1603                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1604                 video_webpage, 'unavailable message', default=None)
1605
1606         if 'token' not in video_info:
1607             if 'reason' in video_info:
1608                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1609                     regions_allowed = self._html_search_meta(
1610                         'regionsAllowed', video_webpage, default=None)
1611                     countries = regions_allowed.split(',') if regions_allowed else None
1612                     self.raise_geo_restricted(
1613                         msg=video_info['reason'][0], countries=countries)
1614                 reason = video_info['reason'][0]
1615                 if 'Invalid parameters' in reason:
1616                     unavailable_message = extract_unavailable_message()
1617                     if unavailable_message:
1618                         reason = unavailable_message
1619                 raise ExtractorError(
1620                     'YouTube said: %s' % reason,
1621                     expected=True, video_id=video_id)
1622             else:
1623                 raise ExtractorError(
1624                     '"token" parameter not in video info for unknown reason',
1625                     video_id=video_id)
1626
1627         # title
1628         if 'title' in video_info:
1629             video_title = video_info['title'][0]
1630         else:
1631             self._downloader.report_warning('Unable to extract video title')
1632             video_title = '_'
1633
1634         # description
1635         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1636         if video_description:
1637
1638             def replace_url(m):
1639                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1640                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1641                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1642                     qs = compat_parse_qs(parsed_redir_url.query)
1643                     q = qs.get('q')
1644                     if q and q[0]:
1645                         return q[0]
1646                 return redir_url
1647
1648             description_original = video_description = re.sub(r'''(?x)
1649                 <a\s+
1650                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1651                     (?:title|href)="([^"]+)"\s+
1652                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1653                     class="[^"]*"[^>]*>
1654                 [^<]+\.{3}\s*
1655                 </a>
1656             ''', replace_url, video_description)
1657             video_description = clean_html(video_description)
1658         else:
1659             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1660             if fd_mobj:
1661                 video_description = unescapeHTML(fd_mobj.group(1))
1662             else:
1663                 video_description = ''
1664
1665         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1666             if not self._downloader.params.get('noplaylist'):
1667                 entries = []
1668                 feed_ids = []
1669                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1670                 for feed in multifeed_metadata_list.split(','):
1671                     # Unquote should take place before split on comma (,) since textual
1672                     # fields may contain comma as well (see
1673                     # https://github.com/rg3/youtube-dl/issues/8536)
1674                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1675                     entries.append({
1676                         '_type': 'url_transparent',
1677                         'ie_key': 'Youtube',
1678                         'url': smuggle_url(
1679                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1680                             {'force_singlefeed': True}),
1681                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1682                     })
1683                     feed_ids.append(feed_data['id'][0])
1684                 self.to_screen(
1685                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1686                     % (', '.join(feed_ids), video_id))
1687                 return self.playlist_result(entries, video_id, video_title, video_description)
1688             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1689
1690         if view_count is None:
1691             view_count = extract_view_count(video_info)
1692
1693         # Check for "rental" videos
1694         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1695             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1696
1697         # Start extracting information
1698         self.report_information_extraction(video_id)
1699
1700         # uploader
1701         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1702         if video_uploader:
1703             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1704         else:
1705             self._downloader.report_warning('unable to extract uploader name')
1706
1707         # uploader_id
1708         video_uploader_id = None
1709         video_uploader_url = None
1710         mobj = re.search(
1711             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1712             video_webpage)
1713         if mobj is not None:
1714             video_uploader_id = mobj.group('uploader_id')
1715             video_uploader_url = mobj.group('uploader_url')
1716         else:
1717             self._downloader.report_warning('unable to extract uploader nickname')
1718
1719         # thumbnail image
1720         # We try first to get a high quality image:
1721         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1722                             video_webpage, re.DOTALL)
1723         if m_thumb is not None:
1724             video_thumbnail = m_thumb.group(1)
1725         elif 'thumbnail_url' not in video_info:
1726             self._downloader.report_warning('unable to extract video thumbnail')
1727             video_thumbnail = None
1728         else:   # don't panic if we can't find it
1729             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1730
1731         # upload date
1732         upload_date = self._html_search_meta(
1733             'datePublished', video_webpage, 'upload date', default=None)
1734         if not upload_date:
1735             upload_date = self._search_regex(
1736                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1737                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1738                 video_webpage, 'upload date', default=None)
1739         upload_date = unified_strdate(upload_date)
1740
1741         video_license = self._html_search_regex(
1742             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1743             video_webpage, 'license', default=None)
1744
1745         m_music = re.search(
1746             r'''(?x)
1747                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1748                 <ul[^>]*>\s*
1749                 <li>(?P<title>.+?)
1750                 by (?P<creator>.+?)
1751                 (?:
1752                     \(.+?\)|
1753                     <a[^>]*
1754                         (?:
1755                             \bhref=["\']/red[^>]*>|             # drop possible
1756                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1757                         )
1758                     .*?
1759                 )?</li
1760             ''',
1761             video_webpage)
1762         if m_music:
1763             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1764             video_creator = clean_html(m_music.group('creator'))
1765         else:
1766             video_alt_title = video_creator = None
1767
1768         m_episode = re.search(
1769             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1770             video_webpage)
1771         if m_episode:
1772             series = m_episode.group('series')
1773             season_number = int(m_episode.group('season'))
1774             episode_number = int(m_episode.group('episode'))
1775         else:
1776             series = season_number = episode_number = None
1777
1778         m_cat_container = self._search_regex(
1779             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1780             video_webpage, 'categories', default=None)
1781         if m_cat_container:
1782             category = self._html_search_regex(
1783                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1784                 default=None)
1785             video_categories = None if category is None else [category]
1786         else:
1787             video_categories = None
1788
1789         video_tags = [
1790             unescapeHTML(m.group('content'))
1791             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1792
1793         def _extract_count(count_name):
1794             return str_to_int(self._search_regex(
1795                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1796                 % re.escape(count_name),
1797                 video_webpage, count_name, default=None))
1798
1799         like_count = _extract_count('like')
1800         dislike_count = _extract_count('dislike')
1801
1802         # subtitles
1803         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1804         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1805
1806         video_duration = try_get(
1807             video_info, lambda x: int_or_none(x['length_seconds'][0]))
1808         if not video_duration:
1809             video_duration = parse_duration(self._html_search_meta(
1810                 'duration', video_webpage, 'video duration'))
1811
1812         # annotations
1813         video_annotations = None
1814         if self._downloader.params.get('writeannotations', False):
1815             video_annotations = self._extract_annotations(video_id)
1816
1817         chapters = self._extract_chapters(description_original, video_duration)
1818
1819         def _extract_filesize(media_url):
1820             return int_or_none(self._search_regex(
1821                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1822
1823         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1824             self.report_rtmp_download()
1825             formats = [{
1826                 'format_id': '_rtmp',
1827                 'protocol': 'rtmp',
1828                 'url': video_info['conn'][0],
1829                 'player_url': player_url,
1830             }]
1831         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1832             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1833             if 'rtmpe%3Dyes' in encoded_url_map:
1834                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1835             formats_spec = {}
1836             fmt_list = video_info.get('fmt_list', [''])[0]
1837             if fmt_list:
1838                 for fmt in fmt_list.split(','):
1839                     spec = fmt.split('/')
1840                     if len(spec) > 1:
1841                         width_height = spec[1].split('x')
1842                         if len(width_height) == 2:
1843                             formats_spec[spec[0]] = {
1844                                 'resolution': spec[1],
1845                                 'width': int_or_none(width_height[0]),
1846                                 'height': int_or_none(width_height[1]),
1847                             }
1848             q = qualities(['small', 'medium', 'hd720'])
1849             formats = []
1850             for url_data_str in encoded_url_map.split(','):
1851                 url_data = compat_parse_qs(url_data_str)
1852                 if 'itag' not in url_data or 'url' not in url_data:
1853                     continue
1854                 format_id = url_data['itag'][0]
1855                 url = url_data['url'][0]
1856
1857                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1858                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1859                     jsplayer_url_json = self._search_regex(
1860                         ASSETS_RE,
1861                         embed_webpage if age_gate else video_webpage,
1862                         'JS player URL (1)', default=None)
1863                     if not jsplayer_url_json and not age_gate:
1864                         # We need the embed website after all
1865                         if embed_webpage is None:
1866                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1867                             embed_webpage = self._download_webpage(
1868                                 embed_url, video_id, 'Downloading embed webpage')
1869                         jsplayer_url_json = self._search_regex(
1870                             ASSETS_RE, embed_webpage, 'JS player URL')
1871
1872                     player_url = json.loads(jsplayer_url_json)
1873                     if player_url is None:
1874                         player_url_json = self._search_regex(
1875                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1876                             video_webpage, 'age gate player URL')
1877                         player_url = json.loads(player_url_json)
1878
1879                 if 'sig' in url_data:
1880                     url += '&signature=' + url_data['sig'][0]
1881                 elif 's' in url_data:
1882                     encrypted_sig = url_data['s'][0]
1883
1884                     if self._downloader.params.get('verbose'):
1885                         if player_url is None:
1886                             player_version = 'unknown'
1887                             player_desc = 'unknown'
1888                         else:
1889                             if player_url.endswith('swf'):
1890                                 player_version = self._search_regex(
1891                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1892                                     'flash player', fatal=False)
1893                                 player_desc = 'flash player %s' % player_version
1894                             else:
1895                                 player_version = self._search_regex(
1896                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1897                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1898                                     player_url,
1899                                     'html5 player', fatal=False)
1900                                 player_desc = 'html5 player %s' % player_version
1901
1902                         parts_sizes = self._signature_cache_id(encrypted_sig)
1903                         self.to_screen('{%s} signature length %s, %s' %
1904                                        (format_id, parts_sizes, player_desc))
1905
1906                     signature = self._decrypt_signature(
1907                         encrypted_sig, video_id, player_url, age_gate)
1908                     url += '&signature=' + signature
1909                 if 'ratebypass' not in url:
1910                     url += '&ratebypass=yes'
1911
1912                 dct = {
1913                     'format_id': format_id,
1914                     'url': url,
1915                     'player_url': player_url,
1916                 }
1917                 if format_id in self._formats:
1918                     dct.update(self._formats[format_id])
1919                 if format_id in formats_spec:
1920                     dct.update(formats_spec[format_id])
1921
1922                 # Some itags are not included in DASH manifest thus corresponding formats will
1923                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1924                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1925                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1926                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1927
1928                 filesize = int_or_none(url_data.get(
1929                     'clen', [None])[0]) or _extract_filesize(url)
1930
1931                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1932
1933                 more_fields = {
1934                     'filesize': filesize,
1935                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1936                     'width': width,
1937                     'height': height,
1938                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1939                     'format_note': quality,
1940                     'quality': q(quality),
1941                 }
1942                 for key, value in more_fields.items():
1943                     if value:
1944                         dct[key] = value
1945                 type_ = url_data.get('type', [None])[0]
1946                 if type_:
1947                     type_split = type_.split(';')
1948                     kind_ext = type_split[0].split('/')
1949                     if len(kind_ext) == 2:
1950                         kind, _ = kind_ext
1951                         dct['ext'] = mimetype2ext(type_split[0])
1952                         if kind in ('audio', 'video'):
1953                             codecs = None
1954                             for mobj in re.finditer(
1955                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1956                                 if mobj.group('key') == 'codecs':
1957                                     codecs = mobj.group('val')
1958                                     break
1959                             if codecs:
1960                                 dct.update(parse_codecs(codecs))
1961                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1962                     dct['downloader_options'] = {
1963                         # Youtube throttles chunks >~10M
1964                         'http_chunk_size': 10485760,
1965                     }
1966                 formats.append(dct)
1967         elif video_info.get('hlsvp'):
1968             manifest_url = video_info['hlsvp'][0]
1969             formats = []
1970             m3u8_formats = self._extract_m3u8_formats(
1971                 manifest_url, video_id, 'mp4', fatal=False)
1972             for a_format in m3u8_formats:
1973                 itag = self._search_regex(
1974                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1975                 if itag:
1976                     a_format['format_id'] = itag
1977                     if itag in self._formats:
1978                         dct = self._formats[itag].copy()
1979                         dct.update(a_format)
1980                         a_format = dct
1981                 a_format['player_url'] = player_url
1982                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1983                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1984                 formats.append(a_format)
1985         else:
1986             error_message = clean_html(video_info.get('reason', [None])[0])
1987             if not error_message:
1988                 error_message = extract_unavailable_message()
1989             if error_message:
1990                 raise ExtractorError(error_message, expected=True)
1991             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1992
1993         # Look for the DASH manifest
1994         if self._downloader.params.get('youtube_include_dash_manifest', True):
1995             dash_mpd_fatal = True
1996             for mpd_url in dash_mpds:
1997                 dash_formats = {}
1998                 try:
1999                     def decrypt_sig(mobj):
2000                         s = mobj.group(1)
2001                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2002                         return '/signature/%s' % dec_s
2003
2004                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2005
2006                     for df in self._extract_mpd_formats(
2007                             mpd_url, video_id, fatal=dash_mpd_fatal,
2008                             formats_dict=self._formats):
2009                         if not df.get('filesize'):
2010                             df['filesize'] = _extract_filesize(df['url'])
2011                         # Do not overwrite DASH format found in some previous DASH manifest
2012                         if df['format_id'] not in dash_formats:
2013                             dash_formats[df['format_id']] = df
2014                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2015                         # allow them to fail without bug report message if we already have
2016                         # some DASH manifest succeeded. This is temporary workaround to reduce
2017                         # burst of bug reports until we figure out the reason and whether it
2018                         # can be fixed at all.
2019                         dash_mpd_fatal = False
2020                 except (ExtractorError, KeyError) as e:
2021                     self.report_warning(
2022                         'Skipping DASH manifest: %r' % e, video_id)
2023                 if dash_formats:
2024                     # Remove the formats we found through non-DASH, they
2025                     # contain less info and it can be wrong, because we use
2026                     # fixed values (for example the resolution). See
2027                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2028                     # example.
2029                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2030                     formats.extend(dash_formats.values())
2031
2032         # Check for malformed aspect ratio
2033         stretched_m = re.search(
2034             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2035             video_webpage)
2036         if stretched_m:
2037             w = float(stretched_m.group('w'))
2038             h = float(stretched_m.group('h'))
2039             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2040             # We will only process correct ratios.
2041             if w > 0 and h > 0:
2042                 ratio = w / h
2043                 for f in formats:
2044                     if f.get('vcodec') != 'none':
2045                         f['stretched_ratio'] = ratio
2046
2047         self._sort_formats(formats)
2048
2049         self.mark_watched(video_id, video_info)
2050
2051         return {
2052             'id': video_id,
2053             'uploader': video_uploader,
2054             'uploader_id': video_uploader_id,
2055             'uploader_url': video_uploader_url,
2056             'upload_date': upload_date,
2057             'license': video_license,
2058             'creator': video_creator,
2059             'title': video_title,
2060             'alt_title': video_alt_title,
2061             'thumbnail': video_thumbnail,
2062             'description': video_description,
2063             'categories': video_categories,
2064             'tags': video_tags,
2065             'subtitles': video_subtitles,
2066             'automatic_captions': automatic_captions,
2067             'duration': video_duration,
2068             'age_limit': 18 if age_gate else 0,
2069             'annotations': video_annotations,
2070             'chapters': chapters,
2071             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2072             'view_count': view_count,
2073             'like_count': like_count,
2074             'dislike_count': dislike_count,
2075             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2076             'formats': formats,
2077             'is_live': is_live,
2078             'start_time': start_time,
2079             'end_time': end_time,
2080             'series': series,
2081             'season_number': season_number,
2082             'episode_number': episode_number,
2083         }
2084
2085
2086 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2087     IE_DESC = 'YouTube.com playlists'
2088     _VALID_URL = r"""(?x)(?:
2089                         (?:https?://)?
2090                         (?:\w+\.)?
2091                         (?:
2092                             youtube\.com/
2093                             (?:
2094                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2095                                \? (?:.*?[&;])*? (?:p|a|list)=
2096                             |  p/
2097                             )|
2098                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2099                         )
2100                         (
2101                             (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
2102                             # Top tracks, they can also include dots
2103                             |(?:MC)[\w\.]*
2104                         )
2105                         .*
2106                      |
2107                         (%(playlist_id)s)
2108                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2109     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2110     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2111     IE_NAME = 'youtube:playlist'
2112     _TESTS = [{
2113         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2114         'info_dict': {
2115             'title': 'ytdl test PL',
2116             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2117         },
2118         'playlist_count': 3,
2119     }, {
2120         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2121         'info_dict': {
2122             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2123             'title': 'YDL_Empty_List',
2124         },
2125         'playlist_count': 0,
2126         'skip': 'This playlist is private',
2127     }, {
2128         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2129         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2130         'info_dict': {
2131             'title': '29C3: Not my department',
2132             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2133         },
2134         'playlist_count': 95,
2135     }, {
2136         'note': 'issue #673',
2137         'url': 'PLBB231211A4F62143',
2138         'info_dict': {
2139             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2140             'id': 'PLBB231211A4F62143',
2141         },
2142         'playlist_mincount': 26,
2143     }, {
2144         'note': 'Large playlist',
2145         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2146         'info_dict': {
2147             'title': 'Uploads from Cauchemar',
2148             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2149         },
2150         'playlist_mincount': 799,
2151     }, {
2152         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2153         'info_dict': {
2154             'title': 'YDL_safe_search',
2155             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2156         },
2157         'playlist_count': 2,
2158         'skip': 'This playlist is private',
2159     }, {
2160         'note': 'embedded',
2161         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2162         'playlist_count': 4,
2163         'info_dict': {
2164             'title': 'JODA15',
2165             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2166         }
2167     }, {
2168         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2169         'playlist_mincount': 485,
2170         'info_dict': {
2171             'title': '2017 華語最新單曲 (2/24更新)',
2172             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2173         }
2174     }, {
2175         'note': 'Embedded SWF player',
2176         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2177         'playlist_count': 4,
2178         'info_dict': {
2179             'title': 'JODA7',
2180             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2181         }
2182     }, {
2183         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2184         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2185         'info_dict': {
2186             'title': 'Uploads from Interstellar Movie',
2187             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2188         },
2189         'playlist_mincount': 21,
2190     }, {
2191         # Playlist URL that does not actually serve a playlist
2192         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2193         'info_dict': {
2194             'id': 'FqZTN594JQw',
2195             'ext': 'webm',
2196             'title': "Smiley's People 01 detective, Adventure Series, Action",
2197             'uploader': 'STREEM',
2198             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2199             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2200             'upload_date': '20150526',
2201             'license': 'Standard YouTube License',
2202             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2203             'categories': ['People & Blogs'],
2204             'tags': list,
2205             'like_count': int,
2206             'dislike_count': int,
2207         },
2208         'params': {
2209             'skip_download': True,
2210         },
2211         'add_ie': [YoutubeIE.ie_key()],
2212     }, {
2213         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2214         'info_dict': {
2215             'id': 'yeWKywCrFtk',
2216             'ext': 'mp4',
2217             'title': 'Small Scale Baler and Braiding Rugs',
2218             'uploader': 'Backus-Page House Museum',
2219             'uploader_id': 'backuspagemuseum',
2220             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2221             'upload_date': '20161008',
2222             'license': 'Standard YouTube License',
2223             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2224             'categories': ['Nonprofits & Activism'],
2225             'tags': list,
2226             'like_count': int,
2227             'dislike_count': int,
2228         },
2229         'params': {
2230             'noplaylist': True,
2231             'skip_download': True,
2232         },
2233     }, {
2234         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2235         'only_matching': True,
2236     }, {
2237         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2238         'only_matching': True,
2239     }]
2240
2241     def _real_initialize(self):
2242         self._login()
2243
2244     def _extract_mix(self, playlist_id):
2245         # The mixes are generated from a single video
2246         # the id of the playlist is just 'RD' + video_id
2247         ids = []
2248         last_id = playlist_id[-11:]
2249         for n in itertools.count(1):
2250             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2251             webpage = self._download_webpage(
2252                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2253             new_ids = orderedSet(re.findall(
2254                 r'''(?xs)data-video-username=".*?".*?
2255                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2256                 webpage))
2257             # Fetch new pages until all the videos are repeated, it seems that
2258             # there are always 51 unique videos.
2259             new_ids = [_id for _id in new_ids if _id not in ids]
2260             if not new_ids:
2261                 break
2262             ids.extend(new_ids)
2263             last_id = ids[-1]
2264
2265         url_results = self._ids_to_results(ids)
2266
2267         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2268         title_span = (
2269             search_title('playlist-title') or
2270             search_title('title long-title') or
2271             search_title('title'))
2272         title = clean_html(title_span)
2273
2274         return self.playlist_result(url_results, playlist_id, title)
2275
2276     def _extract_playlist(self, playlist_id):
2277         url = self._TEMPLATE_URL % playlist_id
2278         page = self._download_webpage(url, playlist_id)
2279
2280         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2281         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2282             match = match.strip()
2283             # Check if the playlist exists or is private
2284             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2285             if mobj:
2286                 reason = mobj.group('reason')
2287                 message = 'This playlist %s' % reason
2288                 if 'private' in reason:
2289                     message += ', use --username or --netrc to access it'
2290                 message += '.'
2291                 raise ExtractorError(message, expected=True)
2292             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2293                 raise ExtractorError(
2294                     'Invalid parameters. Maybe URL is incorrect.',
2295                     expected=True)
2296             elif re.match(r'[^<]*Choose your language[^<]*', match):
2297                 continue
2298             else:
2299                 self.report_warning('Youtube gives an alert message: ' + match)
2300
2301         playlist_title = self._html_search_regex(
2302             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2303             page, 'title', default=None)
2304
2305         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2306         uploader = self._search_regex(
2307             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2308             page, 'uploader', default=None)
2309         mobj = re.search(
2310             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2311             page)
2312         if mobj:
2313             uploader_id = mobj.group('uploader_id')
2314             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2315         else:
2316             uploader_id = uploader_url = None
2317
2318         has_videos = True
2319
2320         if not playlist_title:
2321             try:
2322                 # Some playlist URLs don't actually serve a playlist (e.g.
2323                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2324                 next(self._entries(page, playlist_id))
2325             except StopIteration:
2326                 has_videos = False
2327
2328         playlist = self.playlist_result(
2329             self._entries(page, playlist_id), playlist_id, playlist_title)
2330         playlist.update({
2331             'uploader': uploader,
2332             'uploader_id': uploader_id,
2333             'uploader_url': uploader_url,
2334         })
2335
2336         return has_videos, playlist
2337
2338     def _check_download_just_video(self, url, playlist_id):
2339         # Check if it's a video-specific URL
2340         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2341         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2342             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2343             'video id', default=None)
2344         if video_id:
2345             if self._downloader.params.get('noplaylist'):
2346                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2347                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2348             else:
2349                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2350                 return video_id, None
2351         return None, None
2352
2353     def _real_extract(self, url):
2354         # Extract playlist id
2355         mobj = re.match(self._VALID_URL, url)
2356         if mobj is None:
2357             raise ExtractorError('Invalid URL: %s' % url)
2358         playlist_id = mobj.group(1) or mobj.group(2)
2359
2360         video_id, video = self._check_download_just_video(url, playlist_id)
2361         if video:
2362             return video
2363
2364         if playlist_id.startswith(('RD', 'UL', 'PU')):
2365             # Mixes require a custom extraction process
2366             return self._extract_mix(playlist_id)
2367
2368         has_videos, playlist = self._extract_playlist(playlist_id)
2369         if has_videos or not video_id:
2370             return playlist
2371
2372         # Some playlist URLs don't actually serve a playlist (see
2373         # https://github.com/rg3/youtube-dl/issues/10537).
2374         # Fallback to plain video extraction if there is a video id
2375         # along with playlist id.
2376         return self.url_result(video_id, 'Youtube', video_id=video_id)
2377
2378
2379 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2380     IE_DESC = 'YouTube.com channels'
2381     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2382     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2383     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2384     IE_NAME = 'youtube:channel'
2385     _TESTS = [{
2386         'note': 'paginated channel',
2387         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2388         'playlist_mincount': 91,
2389         'info_dict': {
2390             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2391             'title': 'Uploads from lex will',
2392         }
2393     }, {
2394         'note': 'Age restricted channel',
2395         # from https://www.youtube.com/user/DeusExOfficial
2396         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2397         'playlist_mincount': 64,
2398         'info_dict': {
2399             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2400             'title': 'Uploads from Deus Ex',
2401         },
2402     }]
2403
2404     @classmethod
2405     def suitable(cls, url):
2406         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2407                 else super(YoutubeChannelIE, cls).suitable(url))
2408
2409     def _build_template_url(self, url, channel_id):
2410         return self._TEMPLATE_URL % channel_id
2411
2412     def _real_extract(self, url):
2413         channel_id = self._match_id(url)
2414
2415         url = self._build_template_url(url, channel_id)
2416
2417         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2418         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2419         # otherwise fallback on channel by page extraction
2420         channel_page = self._download_webpage(
2421             url + '?view=57', channel_id,
2422             'Downloading channel page', fatal=False)
2423         if channel_page is False:
2424             channel_playlist_id = False
2425         else:
2426             channel_playlist_id = self._html_search_meta(
2427                 'channelId', channel_page, 'channel id', default=None)
2428             if not channel_playlist_id:
2429                 channel_url = self._html_search_meta(
2430                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2431                     channel_page, 'channel url', default=None)
2432                 if channel_url:
2433                     channel_playlist_id = self._search_regex(
2434                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2435                         channel_url, 'channel id', default=None)
2436         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2437             playlist_id = 'UU' + channel_playlist_id[2:]
2438             return self.url_result(
2439                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2440
2441         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2442         autogenerated = re.search(r'''(?x)
2443                 class="[^"]*?(?:
2444                     channel-header-autogenerated-label|
2445                     yt-channel-title-autogenerated
2446                 )[^"]*"''', channel_page) is not None
2447
2448         if autogenerated:
2449             # The videos are contained in a single page
2450             # the ajax pages can't be used, they are empty
2451             entries = [
2452                 self.url_result(
2453                     video_id, 'Youtube', video_id=video_id,
2454                     video_title=video_title)
2455                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2456             return self.playlist_result(entries, channel_id)
2457
2458         try:
2459             next(self._entries(channel_page, channel_id))
2460         except StopIteration:
2461             alert_message = self._html_search_regex(
2462                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2463                 channel_page, 'alert', default=None, group='alert')
2464             if alert_message:
2465                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2466
2467         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2468
2469
2470 class YoutubeUserIE(YoutubeChannelIE):
2471     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2472     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2473     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2474     IE_NAME = 'youtube:user'
2475
2476     _TESTS = [{
2477         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2478         'playlist_mincount': 320,
2479         'info_dict': {
2480             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2481             'title': 'Uploads from The Linux Foundation',
2482         }
2483     }, {
2484         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2485         # but not https://www.youtube.com/user/12minuteathlete/videos
2486         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2487         'playlist_mincount': 249,
2488         'info_dict': {
2489             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2490             'title': 'Uploads from 12 Minute Athlete',
2491         }
2492     }, {
2493         'url': 'ytuser:phihag',
2494         'only_matching': True,
2495     }, {
2496         'url': 'https://www.youtube.com/c/gametrailers',
2497         'only_matching': True,
2498     }, {
2499         'url': 'https://www.youtube.com/gametrailers',
2500         'only_matching': True,
2501     }, {
2502         # This channel is not available, geo restricted to JP
2503         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2504         'only_matching': True,
2505     }]
2506
2507     @classmethod
2508     def suitable(cls, url):
2509         # Don't return True if the url can be extracted with other youtube
2510         # extractor, the regex would is too permissive and it would match.
2511         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2512         if any(ie.suitable(url) for ie in other_yt_ies):
2513             return False
2514         else:
2515             return super(YoutubeUserIE, cls).suitable(url)
2516
2517     def _build_template_url(self, url, channel_id):
2518         mobj = re.match(self._VALID_URL, url)
2519         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2520
2521
2522 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2523     IE_DESC = 'YouTube.com live streams'
2524     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2525     IE_NAME = 'youtube:live'
2526
2527     _TESTS = [{
2528         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2529         'info_dict': {
2530             'id': 'a48o2S1cPoo',
2531             'ext': 'mp4',
2532             'title': 'The Young Turks - Live Main Show',
2533             'uploader': 'The Young Turks',
2534             'uploader_id': 'TheYoungTurks',
2535             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2536             'upload_date': '20150715',
2537             'license': 'Standard YouTube License',
2538             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2539             'categories': ['News & Politics'],
2540             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2541             'like_count': int,
2542             'dislike_count': int,
2543         },
2544         'params': {
2545             'skip_download': True,
2546         },
2547     }, {
2548         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2549         'only_matching': True,
2550     }, {
2551         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2552         'only_matching': True,
2553     }, {
2554         'url': 'https://www.youtube.com/TheYoungTurks/live',
2555         'only_matching': True,
2556     }]
2557
2558     def _real_extract(self, url):
2559         mobj = re.match(self._VALID_URL, url)
2560         channel_id = mobj.group('id')
2561         base_url = mobj.group('base_url')
2562         webpage = self._download_webpage(url, channel_id, fatal=False)
2563         if webpage:
2564             page_type = self._og_search_property(
2565                 'type', webpage, 'page type', default='')
2566             video_id = self._html_search_meta(
2567                 'videoId', webpage, 'video id', default=None)
2568             if page_type.startswith('video') and video_id and re.match(
2569                     r'^[0-9A-Za-z_-]{11}$', video_id):
2570                 return self.url_result(video_id, YoutubeIE.ie_key())
2571         return self.url_result(base_url)
2572
2573
2574 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2575     IE_DESC = 'YouTube.com user/channel playlists'
2576     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2577     IE_NAME = 'youtube:playlists'
2578
2579     _TESTS = [{
2580         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2581         'playlist_mincount': 4,
2582         'info_dict': {
2583             'id': 'ThirstForScience',
2584             'title': 'Thirst for Science',
2585         },
2586     }, {
2587         # with "Load more" button
2588         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2589         'playlist_mincount': 70,
2590         'info_dict': {
2591             'id': 'igorkle1',
2592             'title': 'Игорь Клейнер',
2593         },
2594     }, {
2595         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2596         'playlist_mincount': 17,
2597         'info_dict': {
2598             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2599             'title': 'Chem Player',
2600         },
2601     }]
2602
2603
2604 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2605     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2606
2607
2608 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2609     IE_DESC = 'YouTube.com searches'
2610     # there doesn't appear to be a real limit, for example if you search for
2611     # 'python' you get more than 8.000.000 results
2612     _MAX_RESULTS = float('inf')
2613     IE_NAME = 'youtube:search'
2614     _SEARCH_KEY = 'ytsearch'
2615     _EXTRA_QUERY_ARGS = {}
2616     _TESTS = []
2617
2618     def _get_n_results(self, query, n):
2619         """Get a specified number of results for a query"""
2620
2621         videos = []
2622         limit = n
2623
2624         url_query = {
2625             'search_query': query.encode('utf-8'),
2626         }
2627         url_query.update(self._EXTRA_QUERY_ARGS)
2628         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2629
2630         for pagenum in itertools.count(1):
2631             data = self._download_json(
2632                 result_url, video_id='query "%s"' % query,
2633                 note='Downloading page %s' % pagenum,
2634                 errnote='Unable to download API page',
2635                 query={'spf': 'navigate'})
2636             html_content = data[1]['body']['content']
2637
2638             if 'class="search-message' in html_content:
2639                 raise ExtractorError(
2640                     '[youtube] No video results', expected=True)
2641
2642             new_videos = list(self._process_page(html_content))
2643             videos += new_videos
2644             if not new_videos or len(videos) > limit:
2645                 break
2646             next_link = self._html_search_regex(
2647                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2648                 html_content, 'next link', default=None)
2649             if next_link is None:
2650                 break
2651             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2652
2653         if len(videos) > n:
2654             videos = videos[:n]
2655         return self.playlist_result(videos, query)
2656
2657
2658 class YoutubeSearchDateIE(YoutubeSearchIE):
2659     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2660     _SEARCH_KEY = 'ytsearchdate'
2661     IE_DESC = 'YouTube.com searches, newest videos first'
2662     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2663
2664
2665 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2666     IE_DESC = 'YouTube.com search URLs'
2667     IE_NAME = 'youtube:search_url'
2668     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2669     _TESTS = [{
2670         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2671         'playlist_mincount': 5,
2672         'info_dict': {
2673             'title': 'youtube-dl test video',
2674         }
2675     }, {
2676         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2677         'only_matching': True,
2678     }]
2679
2680     def _real_extract(self, url):
2681         mobj = re.match(self._VALID_URL, url)
2682         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2683         webpage = self._download_webpage(url, query)
2684         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2685
2686
2687 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2688     IE_DESC = 'YouTube.com (multi-season) shows'
2689     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2690     IE_NAME = 'youtube:show'
2691     _TESTS = [{
2692         'url': 'https://www.youtube.com/show/airdisasters',
2693         'playlist_mincount': 5,
2694         'info_dict': {
2695             'id': 'airdisasters',
2696             'title': 'Air Disasters',
2697         }
2698     }]
2699
2700     def _real_extract(self, url):
2701         playlist_id = self._match_id(url)
2702         return super(YoutubeShowIE, self)._real_extract(
2703             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2704
2705
2706 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2707     """
2708     Base class for feed extractors
2709     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2710     """
2711     _LOGIN_REQUIRED = True
2712
2713     @property
2714     def IE_NAME(self):
2715         return 'youtube:%s' % self._FEED_NAME
2716
2717     def _real_initialize(self):
2718         self._login()
2719
2720     def _entries(self, page):
2721         # The extraction process is the same as for playlists, but the regex
2722         # for the video ids doesn't contain an index
2723         ids = []
2724         more_widget_html = content_html = page
2725         for page_num in itertools.count(1):
2726             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2727
2728             # 'recommended' feed has infinite 'load more' and each new portion spins
2729             # the same videos in (sometimes) slightly different order, so we'll check
2730             # for unicity and break when portion has no new videos
2731             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2732             if not new_ids:
2733                 break
2734
2735             ids.extend(new_ids)
2736
2737             for entry in self._ids_to_results(new_ids):
2738                 yield entry
2739
2740             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2741             if not mobj:
2742                 break
2743
2744             more = self._download_json(
2745                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2746                 'Downloading page #%s' % page_num,
2747                 transform_source=uppercase_escape)
2748             content_html = more['content_html']
2749             more_widget_html = more['load_more_widget_html']
2750
2751     def _real_extract(self, url):
2752         page = self._download_webpage(
2753             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2754             self._PLAYLIST_TITLE)
2755         return self.playlist_result(
2756             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2757
2758
2759 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2760     IE_NAME = 'youtube:watchlater'
2761     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2762     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2763
2764     _TESTS = [{
2765         'url': 'https://www.youtube.com/playlist?list=WL',
2766         'only_matching': True,
2767     }, {
2768         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2769         'only_matching': True,
2770     }]
2771
2772     def _real_extract(self, url):
2773         _, video = self._check_download_just_video(url, 'WL')
2774         if video:
2775             return video
2776         _, playlist = self._extract_playlist('WL')
2777         return playlist
2778
2779
2780 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2781     IE_NAME = 'youtube:favorites'
2782     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2783     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2784     _LOGIN_REQUIRED = True
2785
2786     def _real_extract(self, url):
2787         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2788         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2789         return self.url_result(playlist_id, 'YoutubePlaylist')
2790
2791
2792 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2793     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2794     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2795     _FEED_NAME = 'recommended'
2796     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2797
2798
2799 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2800     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2801     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2802     _FEED_NAME = 'subscriptions'
2803     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2804
2805
2806 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2807     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2808     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2809     _FEED_NAME = 'history'
2810     _PLAYLIST_TITLE = 'Youtube History'
2811
2812
2813 class YoutubeTruncatedURLIE(InfoExtractor):
2814     IE_NAME = 'youtube:truncated_url'
2815     IE_DESC = False  # Do not list
2816     _VALID_URL = r'''(?x)
2817         (?:https?://)?
2818         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2819         (?:watch\?(?:
2820             feature=[a-z_]+|
2821             annotation_id=annotation_[^&]+|
2822             x-yt-cl=[0-9]+|
2823             hl=[^&]*|
2824             t=[0-9]+
2825         )?
2826         |
2827             attribution_link\?a=[^&]+
2828         )
2829         $
2830     '''
2831
2832     _TESTS = [{
2833         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2834         'only_matching': True,
2835     }, {
2836         'url': 'https://www.youtube.com/watch?',
2837         'only_matching': True,
2838     }, {
2839         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2840         'only_matching': True,
2841     }, {
2842         'url': 'https://www.youtube.com/watch?feature=foo',
2843         'only_matching': True,
2844     }, {
2845         'url': 'https://www.youtube.com/watch?hl=en-GB',
2846         'only_matching': True,
2847     }, {
2848         'url': 'https://www.youtube.com/watch?t=2372',
2849         'only_matching': True,
2850     }]
2851
2852     def _real_extract(self, url):
2853         raise ExtractorError(
2854             'Did you forget to quote the URL? Remember that & is a meta '
2855             'character in most shells, so you want to put the URL in quotes, '
2856             'like  youtube-dl '
2857             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2858             ' or simply  youtube-dl BaW_jenozKc  .',
2859             expected=True)
2860
2861
2862 class YoutubeTruncatedIDIE(InfoExtractor):
2863     IE_NAME = 'youtube:truncated_id'
2864     IE_DESC = False  # Do not list
2865     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2866
2867     _TESTS = [{
2868         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2869         'only_matching': True,
2870     }]
2871
2872     def _real_extract(self, url):
2873         video_id = self._match_id(url)
2874         raise ExtractorError(
2875             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2876             expected=True)