_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         login_challenge = try_get(res, lambda x: x[0][0], list)
 182         if login_challenge:
 183             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 184             if challenge_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231             else:
 232                 CHALLENGES = {
 233                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 234                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 235                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 236                 }
 237                 challenge = CHALLENGES.get(
 238                     challenge_str,
 239                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 240                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 241                 return False
 242         else:
 243             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 244
 245         if not check_cookie_url:
 246             warn('Unable to extract CheckCookie URL')
 247             return False
 248
 249         check_cookie_results = self._download_webpage(
 250             check_cookie_url, None, 'Checking cookie', fatal=False)
 251
 252         if check_cookie_results is False:
 253             return False
 254
 255         if 'https://myaccount.google.com/' not in check_cookie_results:
 256             warn('Unable to log in')
 257             return False
 258
 259         return True
 260
 261     def _download_webpage_handle(self, *args, **kwargs):
 262         query = kwargs.get('query', {}).copy()
 263         query['disable_polymer'] = 'true'
 264         kwargs['query'] = query
 265         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 266             *args, **compat_kwargs(kwargs))
 267
 268     def _real_initialize(self):
 269         if self._downloader is None:
 270             return
 271         self._set_language()
 272         if not self._login():
 273             return
 274
 275
 276 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 277     # Extract entries from page with "Load more" button
 278     def _entries(self, page, playlist_id):
 279         more_widget_html = content_html = page
 280         for page_num in itertools.count(1):
 281             for entry in self._process_page(content_html):
 282                 yield entry
 283
 284             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 285             if not mobj:
 286                 break
 287
 288             more = self._download_json(
 289                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 290                 'Downloading page #%s' % page_num,
 291                 transform_source=uppercase_escape)
 292             content_html = more['content_html']
 293             if not content_html.strip():
 294                 # Some webpages show a "Load more" button but they don't
 295                 # have more videos
 296                 break
 297             more_widget_html = more['load_more_widget_html']
 298
 299
 300 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 301     def _process_page(self, content):
 302         for video_id, video_title in self.extract_videos_from_page(content):
 303             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 304
 305     def extract_videos_from_page(self, page):
 306         ids_in_page = []
 307         titles_in_page = []
 308         for mobj in re.finditer(self._VIDEO_RE, page):
 309             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 310             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 311                 continue
 312             video_id = mobj.group('id')
 313             video_title = unescapeHTML(mobj.group('title'))
 314             if video_title:
 315                 video_title = video_title.strip()
 316             try:
 317                 idx = ids_in_page.index(video_id)
 318                 if video_title and not titles_in_page[idx]:
 319                     titles_in_page[idx] = video_title
 320             except ValueError:
 321                 ids_in_page.append(video_id)
 322                 titles_in_page.append(video_title)
 323         return zip(ids_in_page, titles_in_page)
 324
 325
 326 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 327     def _process_page(self, content):
 328         for playlist_id in orderedSet(re.findall(
 329                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 330                 content)):
 331             yield self.url_result(
 332                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 333
 334     def _real_extract(self, url):
 335         playlist_id = self._match_id(url)
 336         webpage = self._download_webpage(url, playlist_id)
 337         title = self._og_search_title(webpage, fatal=False)
 338         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 339
 340
 341 class YoutubeIE(YoutubeBaseInfoExtractor):
 342     IE_DESC = 'YouTube.com'
 343     _VALID_URL = r"""(?x)^
 344                      (
 345                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 346                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 347                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 348                             (?:www\.)?pwnyoutube\.com/|
 349                             (?:www\.)?hooktube\.com/|
 350                             (?:www\.)?yourepeat\.com/|
 351                             tube\.majestyc\.net/|
 352                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 353                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 354                          (?:                                                  # the various things that can precede the ID:
 355                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 356                              |(?:                                             # or the v= param in all its forms
 357                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 358                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 359                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 360                                  v=
 361                              )
 362                          ))
 363                          |(?:
 364                             youtu\.be|                                        # just youtu.be/xxxx
 365                             vid\.plus|                                        # or vid.plus/xxxx
 366                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 367                          )/
 368                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 369                          )
 370                      )?                                                       # all until now is optional -> you can pass the naked ID
 371                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 372                      (?!.*?\blist=
 373                         (?:
 374                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 375                             WL                                                # WL are handled by the watch later IE
 376                         )
 377                      )
 378                      (?(1).+)?                                                # if we found the ID, everything can follow
 379                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 380     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 381     _formats = {
 382         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 383         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 384         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 385         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 386         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 387         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 388         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 389         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 390         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 391         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 392         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 393         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 394         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 395         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 396         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 397         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 398         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 399         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 400
 401
 402         # 3D videos
 403         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 404         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 405         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 406         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 407         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 408         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 409         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 410
 411         # Apple HTTP Live Streaming
 412         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 413         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 414         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 415         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 416         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 417         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 418         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 419         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 420
 421         # DASH mp4 video
 422         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 423         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 424         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 425         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 426         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 427         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 428         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 429         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 430         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 431         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 432         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 433         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 434
 435         # Dash mp4 audio
 436         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 437         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 438         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 439         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 440         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 441         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 442         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 443
 444         # Dash webm
 445         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 446         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 447         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 448         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 449         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 450         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 451         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 452         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 454         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 455         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 456         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 457         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 458         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 459         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 461         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 462         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 463         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 464         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 465         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 466         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 467
 468         # Dash webm audio
 469         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 470         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 471
 472         # Dash webm audio with opus inside
 473         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 474         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 475         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 476
 477         # RTMP (unnamed)
 478         '_rtmp': {'protocol': 'rtmp'},
 479     }
 480     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 481
 482     _GEO_BYPASS = False
 483
 484     IE_NAME = 'youtube'
 485     _TESTS = [
 486         {
 487             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 488             'info_dict': {
 489                 'id': 'BaW_jenozKc',
 490                 'ext': 'mp4',
 491                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 492                 'uploader': 'Philipp Hagemeister',
 493                 'uploader_id': 'phihag',
 494                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 495                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 496                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 497                 'upload_date': '20121002',
 498                 'license': 'Standard YouTube License',
 499                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 500                 'categories': ['Science & Technology'],
 501                 'tags': ['youtube-dl'],
 502                 'duration': 10,
 503                 'like_count': int,
 504                 'dislike_count': int,
 505                 'start_time': 1,
 506                 'end_time': 9,
 507             }
 508         },
 509         {
 510             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 511             'note': 'Test generic use_cipher_signature video (#897)',
 512             'info_dict': {
 513                 'id': 'UxxajLWwzqY',
 514                 'ext': 'mp4',
 515                 'upload_date': '20120506',
 516                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 517                 'alt_title': 'I Love It (feat. Charli XCX)',
 518                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 519                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 520                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 521                          'iconic ep', 'iconic', 'love', 'it'],
 522                 'duration': 180,
 523                 'uploader': 'Icona Pop',
 524                 'uploader_id': 'IconaPop',
 525                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 526                 'license': 'Standard YouTube License',
 527                 'creator': 'Icona Pop',
 528                 'track': 'I Love It (feat. Charli XCX)',
 529                 'artist': 'Icona Pop',
 530             }
 531         },
 532         {
 533             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 534             'note': 'Test VEVO video with age protection (#956)',
 535             'info_dict': {
 536                 'id': '07FYdnEawAQ',
 537                 'ext': 'mp4',
 538                 'upload_date': '20130703',
 539                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 540                 'alt_title': 'Tunnel Vision',
 541                 'description': 'md5:64249768eec3bc4276236606ea996373',
 542                 'duration': 419,
 543                 'uploader': 'justintimberlakeVEVO',
 544                 'uploader_id': 'justintimberlakeVEVO',
 545                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 546                 'license': 'Standard YouTube License',
 547                 'creator': 'Justin Timberlake',
 548                 'track': 'Tunnel Vision',
 549                 'artist': 'Justin Timberlake',
 550                 'age_limit': 18,
 551             }
 552         },
 553         {
 554             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 555             'note': 'Embed-only video (#1746)',
 556             'info_dict': {
 557                 'id': 'yZIXLfi8CZQ',
 558                 'ext': 'mp4',
 559                 'upload_date': '20120608',
 560                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 561                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 562                 'uploader': 'SET India',
 563                 'uploader_id': 'setindia',
 564                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 565                 'license': 'Standard YouTube License',
 566                 'age_limit': 18,
 567             }
 568         },
 569         {
 570             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 571             'note': 'Use the first video ID in the URL',
 572             'info_dict': {
 573                 'id': 'BaW_jenozKc',
 574                 'ext': 'mp4',
 575                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 576                 'uploader': 'Philipp Hagemeister',
 577                 'uploader_id': 'phihag',
 578                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 579                 'upload_date': '20121002',
 580                 'license': 'Standard YouTube License',
 581                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 582                 'categories': ['Science & Technology'],
 583                 'tags': ['youtube-dl'],
 584                 'duration': 10,
 585                 'like_count': int,
 586                 'dislike_count': int,
 587             },
 588             'params': {
 589                 'skip_download': True,
 590             },
 591         },
 592         {
 593             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 594             'note': '256k DASH audio (format 141) via DASH manifest',
 595             'info_dict': {
 596                 'id': 'a9LDPn-MO4I',
 597                 'ext': 'm4a',
 598                 'upload_date': '20121002',
 599                 'uploader_id': '8KVIDEO',
 600                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 601                 'description': '',
 602                 'uploader': '8KVIDEO',
 603                 'license': 'Standard YouTube License',
 604                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 605             },
 606             'params': {
 607                 'youtube_include_dash_manifest': True,
 608                 'format': '141',
 609             },
 610             'skip': 'format 141 not served anymore',
 611         },
 612         # DASH manifest with encrypted signature
 613         {
 614             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 615             'info_dict': {
 616                 'id': 'IB3lcPjvWLA',
 617                 'ext': 'm4a',
 618                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 619                 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
 620                 'duration': 244,
 621                 'uploader': 'AfrojackVEVO',
 622                 'uploader_id': 'AfrojackVEVO',
 623                 'upload_date': '20131011',
 624                 'license': 'Standard YouTube License',
 625             },
 626             'params': {
 627                 'youtube_include_dash_manifest': True,
 628                 'format': '141/bestaudio[ext=m4a]',
 629             },
 630         },
 631         # JS player signature function name containing $
 632         {
 633             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 634             'info_dict': {
 635                 'id': 'nfWlot6h_JM',
 636                 'ext': 'm4a',
 637                 'title': 'Taylor Swift - Shake It Off',
 638                 'alt_title': 'Shake It Off',
 639                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 640                 'duration': 242,
 641                 'uploader': 'TaylorSwiftVEVO',
 642                 'uploader_id': 'TaylorSwiftVEVO',
 643                 'upload_date': '20140818',
 644                 'license': 'Standard YouTube License',
 645                 'creator': 'Taylor Swift',
 646             },
 647             'params': {
 648                 'youtube_include_dash_manifest': True,
 649                 'format': '141/bestaudio[ext=m4a]',
 650             },
 651         },
 652         # Controversy video
 653         {
 654             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 655             'info_dict': {
 656                 'id': 'T4XJQO3qol8',
 657                 'ext': 'mp4',
 658                 'duration': 219,
 659                 'upload_date': '20100909',
 660                 'uploader': 'TJ Kirk',
 661                 'uploader_id': 'TheAmazingAtheist',
 662                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 663                 'license': 'Standard YouTube License',
 664                 'title': 'Burning Everyone\'s Koran',
 665                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 666             }
 667         },
 668         # Normal age-gate video (No vevo, embed allowed)
 669         {
 670             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 671             'info_dict': {
 672                 'id': 'HtVdAasjOgU',
 673                 'ext': 'mp4',
 674                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 675                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 676                 'duration': 142,
 677                 'uploader': 'The Witcher',
 678                 'uploader_id': 'WitcherGame',
 679                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 680                 'upload_date': '20140605',
 681                 'license': 'Standard YouTube License',
 682                 'age_limit': 18,
 683             },
 684         },
 685         # Age-gate video with encrypted signature
 686         {
 687             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 688             'info_dict': {
 689                 'id': '6kLq3WMV1nU',
 690                 'ext': 'webm',
 691                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 692                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 693                 'duration': 246,
 694                 'uploader': 'LloydVEVO',
 695                 'uploader_id': 'LloydVEVO',
 696                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 697                 'upload_date': '20110629',
 698                 'license': 'Standard YouTube License',
 699                 'age_limit': 18,
 700             },
 701         },
 702         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 703         # YouTube Red ad is not captured for creator
 704         {
 705             'url': '__2ABJjxzNo',
 706             'info_dict': {
 707                 'id': '__2ABJjxzNo',
 708                 'ext': 'mp4',
 709                 'duration': 266,
 710                 'upload_date': '20100430',
 711                 'uploader_id': 'deadmau5',
 712                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 713                 'creator': 'deadmau5',
 714                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 715                 'uploader': 'deadmau5',
 716                 'license': 'Standard YouTube License',
 717                 'title': 'Deadmau5 - Some Chords (HD)',
 718                 'alt_title': 'Some Chords',
 719             },
 720             'expected_warnings': [
 721                 'DASH manifest missing',
 722             ]
 723         },
 724         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 725         {
 726             'url': 'lqQg6PlCWgI',
 727             'info_dict': {
 728                 'id': 'lqQg6PlCWgI',
 729                 'ext': 'mp4',
 730                 'duration': 6085,
 731                 'upload_date': '20150827',
 732                 'uploader_id': 'olympic',
 733                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 734                 'license': 'Standard YouTube License',
 735                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 736                 'uploader': 'Olympic',
 737                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 738             },
 739             'params': {
 740                 'skip_download': 'requires avconv',
 741             }
 742         },
 743         # Non-square pixels
 744         {
 745             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 746             'info_dict': {
 747                 'id': '_b-2C3KPAM0',
 748                 'ext': 'mp4',
 749                 'stretched_ratio': 16 / 9.,
 750                 'duration': 85,
 751                 'upload_date': '20110310',
 752                 'uploader_id': 'AllenMeow',
 753                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 754                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 755                 'uploader': '孫ᄋᄅ',
 756                 'license': 'Standard YouTube License',
 757                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 758             },
 759         },
 760         # url_encoded_fmt_stream_map is empty string
 761         {
 762             'url': 'qEJwOuvDf7I',
 763             'info_dict': {
 764                 'id': 'qEJwOuvDf7I',
 765                 'ext': 'webm',
 766                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 767                 'description': '',
 768                 'upload_date': '20150404',
 769                 'uploader_id': 'spbelect',
 770                 'uploader': 'Наблюдатели Петербурга',
 771             },
 772             'params': {
 773                 'skip_download': 'requires avconv',
 774             },
 775             'skip': 'This live event has ended.',
 776         },
 777         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 778         {
 779             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 780             'info_dict': {
 781                 'id': 'FIl7x6_3R5Y',
 782                 'ext': 'webm',
 783                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 784                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 785                 'duration': 220,
 786                 'upload_date': '20150625',
 787                 'uploader_id': 'dorappi2000',
 788                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 789                 'uploader': 'dorappi2000',
 790                 'license': 'Standard YouTube License',
 791                 'formats': 'mincount:31',
 792             },
 793             'skip': 'not actual anymore',
 794         },
 795         # DASH manifest with segment_list
 796         {
 797             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 798             'md5': '8ce563a1d667b599d21064e982ab9e31',
 799             'info_dict': {
 800                 'id': 'CsmdDsKjzN8',
 801                 'ext': 'mp4',
 802                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 803                 'uploader': 'Airtek',
 804                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 805                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 806                 'license': 'Standard YouTube License',
 807                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 808             },
 809             'params': {
 810                 'youtube_include_dash_manifest': True,
 811                 'format': '135',  # bestvideo
 812             },
 813             'skip': 'This live event has ended.',
 814         },
 815         {
 816             # Multifeed videos (multiple cameras), URL is for Main Camera
 817             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 818             'info_dict': {
 819                 'id': 'jqWvoWXjCVs',
 820                 'title': 'teamPGP: Rocket League Noob Stream',
 821                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 822             },
 823             'playlist': [{
 824                 'info_dict': {
 825                     'id': 'jqWvoWXjCVs',
 826                     'ext': 'mp4',
 827                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 828                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 829                     'duration': 7335,
 830                     'upload_date': '20150721',
 831                     'uploader': 'Beer Games Beer',
 832                     'uploader_id': 'beergamesbeer',
 833                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 834                     'license': 'Standard YouTube License',
 835                 },
 836             }, {
 837                 'info_dict': {
 838                     'id': '6h8e8xoXJzg',
 839                     'ext': 'mp4',
 840                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 841                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 842                     'duration': 7337,
 843                     'upload_date': '20150721',
 844                     'uploader': 'Beer Games Beer',
 845                     'uploader_id': 'beergamesbeer',
 846                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 847                     'license': 'Standard YouTube License',
 848                 },
 849             }, {
 850                 'info_dict': {
 851                     'id': 'PUOgX5z9xZw',
 852                     'ext': 'mp4',
 853                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 854                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 855                     'duration': 7337,
 856                     'upload_date': '20150721',
 857                     'uploader': 'Beer Games Beer',
 858                     'uploader_id': 'beergamesbeer',
 859                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 860                     'license': 'Standard YouTube License',
 861                 },
 862             }, {
 863                 'info_dict': {
 864                     'id': 'teuwxikvS5k',
 865                     'ext': 'mp4',
 866                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 867                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 868                     'duration': 7334,
 869                     'upload_date': '20150721',
 870                     'uploader': 'Beer Games Beer',
 871                     'uploader_id': 'beergamesbeer',
 872                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 873                     'license': 'Standard YouTube License',
 874                 },
 875             }],
 876             'params': {
 877                 'skip_download': True,
 878             },
 879         },
 880         {
 881             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 882             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 883             'info_dict': {
 884                 'id': 'gVfLd0zydlo',
 885                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 886             },
 887             'playlist_count': 2,
 888             'skip': 'Not multifeed anymore',
 889         },
 890         {
 891             'url': 'https://vid.plus/FlRa-iH7PGw',
 892             'only_matching': True,
 893         },
 894         {
 895             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 896             'only_matching': True,
 897         },
 898         {
 899             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 900             # Also tests cut-off URL expansion in video description (see
 901             # https://github.com/rg3/youtube-dl/issues/1892,
 902             # https://github.com/rg3/youtube-dl/issues/8164)
 903             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 904             'info_dict': {
 905                 'id': 'lsguqyKfVQg',
 906                 'ext': 'mp4',
 907                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 908                 'alt_title': 'Dark Walk - Position Music',
 909                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 910                 'duration': 133,
 911                 'upload_date': '20151119',
 912                 'uploader_id': 'IronSoulElf',
 913                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 914                 'uploader': 'IronSoulElf',
 915                 'license': 'Standard YouTube License',
 916                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 917                 'track': 'Dark Walk - Position Music',
 918                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 919             },
 920             'params': {
 921                 'skip_download': True,
 922             },
 923         },
 924         {
 925             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 926             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 927             'only_matching': True,
 928         },
 929         {
 930             # Video with yt:stretch=17:0
 931             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 932             'info_dict': {
 933                 'id': 'Q39EVAstoRM',
 934                 'ext': 'mp4',
 935                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 936                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 937                 'upload_date': '20151107',
 938                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 939                 'uploader': 'CH GAMER DROID',
 940             },
 941             'params': {
 942                 'skip_download': True,
 943             },
 944             'skip': 'This video does not exist.',
 945         },
 946         {
 947             # Video licensed under Creative Commons
 948             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 949             'info_dict': {
 950                 'id': 'M4gD1WSo5mA',
 951                 'ext': 'mp4',
 952                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 953                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 954                 'duration': 721,
 955                 'upload_date': '20150127',
 956                 'uploader_id': 'BerkmanCenter',
 957                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 958                 'uploader': 'The Berkman Klein Center for Internet & Society',
 959                 'license': 'Creative Commons Attribution license (reuse allowed)',
 960             },
 961             'params': {
 962                 'skip_download': True,
 963             },
 964         },
 965         {
 966             # Channel-like uploader_url
 967             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 968             'info_dict': {
 969                 'id': 'eQcmzGIKrzg',
 970                 'ext': 'mp4',
 971                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 972                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 973                 'duration': 4060,
 974                 'upload_date': '20151119',
 975                 'uploader': 'Bernie Sanders',
 976                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 977                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 978                 'license': 'Creative Commons Attribution license (reuse allowed)',
 979             },
 980             'params': {
 981                 'skip_download': True,
 982             },
 983         },
 984         {
 985             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 986             'only_matching': True,
 987         },
 988         {
 989             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 990             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 991             'only_matching': True,
 992         },
 993         {
 994             # Rental video preview
 995             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 996             'info_dict': {
 997                 'id': 'uGpuVWrhIzE',
 998                 'ext': 'mp4',
 999                 'title': 'Piku - Trailer',
1000                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1001                 'upload_date': '20150811',
1002                 'uploader': 'FlixMatrix',
1003                 'uploader_id': 'FlixMatrixKaravan',
1004                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1005                 'license': 'Standard YouTube License',
1006             },
1007             'params': {
1008                 'skip_download': True,
1009             },
1010             'skip': 'This video is not available.',
1011         },
1012         {
1013             # YouTube Red video with episode data
1014             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1015             'info_dict': {
1016                 'id': 'iqKdEhx-dD4',
1017                 'ext': 'mp4',
1018                 'title': 'Isolation - Mind Field (Ep 1)',
1019                 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1020                 'duration': 2085,
1021                 'upload_date': '20170118',
1022                 'uploader': 'Vsauce',
1023                 'uploader_id': 'Vsauce',
1024                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1025                 'license': 'Standard YouTube License',
1026                 'series': 'Mind Field',
1027                 'season_number': 1,
1028                 'episode_number': 1,
1029             },
1030             'params': {
1031                 'skip_download': True,
1032             },
1033             'expected_warnings': [
1034                 'Skipping DASH manifest',
1035             ],
1036         },
1037         {
1038             # The following content has been identified by the YouTube community
1039             # as inappropriate or offensive to some audiences.
1040             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1041             'info_dict': {
1042                 'id': '6SJNVb0GnPI',
1043                 'ext': 'mp4',
1044                 'title': 'Race Differences in Intelligence',
1045                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1046                 'duration': 965,
1047                 'upload_date': '20140124',
1048                 'uploader': 'New Century Foundation',
1049                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1050                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1051                 'license': 'Standard YouTube License',
1052             },
1053             'params': {
1054                 'skip_download': True,
1055             },
1056         },
1057         {
1058             # itag 212
1059             'url': '1t24XAntNCY',
1060             'only_matching': True,
1061         },
1062         {
1063             # geo restricted to JP
1064             'url': 'sJL6WA-aGkQ',
1065             'only_matching': True,
1066         },
1067         {
1068             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1069             'only_matching': True,
1070         },
1071     ]
1072
1073     def __init__(self, *args, **kwargs):
1074         super(YoutubeIE, self).__init__(*args, **kwargs)
1075         self._player_cache = {}
1076
1077     def report_video_info_webpage_download(self, video_id):
1078         """Report attempt to download video info webpage."""
1079         self.to_screen('%s: Downloading video info webpage' % video_id)
1080
1081     def report_information_extraction(self, video_id):
1082         """Report attempt to extract video information."""
1083         self.to_screen('%s: Extracting video information' % video_id)
1084
1085     def report_unavailable_format(self, video_id, format):
1086         """Report extracted video URL."""
1087         self.to_screen('%s: Format %s not available' % (video_id, format))
1088
1089     def report_rtmp_download(self):
1090         """Indicate the download will use the RTMP protocol."""
1091         self.to_screen('RTMP download detected')
1092
1093     def _signature_cache_id(self, example_sig):
1094         """ Return a string representation of a signature """
1095         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1096
1097     def _extract_signature_function(self, video_id, player_url, example_sig):
1098         id_m = re.match(
1099             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1100             player_url)
1101         if not id_m:
1102             raise ExtractorError('Cannot identify player %r' % player_url)
1103         player_type = id_m.group('ext')
1104         player_id = id_m.group('id')
1105
1106         # Read from filesystem cache
1107         func_id = '%s_%s_%s' % (
1108             player_type, player_id, self._signature_cache_id(example_sig))
1109         assert os.path.basename(func_id) == func_id
1110
1111         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1112         if cache_spec is not None:
1113             return lambda s: ''.join(s[i] for i in cache_spec)
1114
1115         download_note = (
1116             'Downloading player %s' % player_url
1117             if self._downloader.params.get('verbose') else
1118             'Downloading %s player %s' % (player_type, player_id)
1119         )
1120         if player_type == 'js':
1121             code = self._download_webpage(
1122                 player_url, video_id,
1123                 note=download_note,
1124                 errnote='Download of %s failed' % player_url)
1125             res = self._parse_sig_js(code)
1126         elif player_type == 'swf':
1127             urlh = self._request_webpage(
1128                 player_url, video_id,
1129                 note=download_note,
1130                 errnote='Download of %s failed' % player_url)
1131             code = urlh.read()
1132             res = self._parse_sig_swf(code)
1133         else:
1134             assert False, 'Invalid player type %r' % player_type
1135
1136         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1137         cache_res = res(test_string)
1138         cache_spec = [ord(c) for c in cache_res]
1139
1140         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1141         return res
1142
1143     def _print_sig_code(self, func, example_sig):
1144         def gen_sig_code(idxs):
1145             def _genslice(start, end, step):
1146                 starts = '' if start == 0 else str(start)
1147                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1148                 steps = '' if step == 1 else (':%d' % step)
1149                 return 's[%s%s%s]' % (starts, ends, steps)
1150
1151             step = None
1152             # Quelch pyflakes warnings - start will be set when step is set
1153             start = '(Never used)'
1154             for i, prev in zip(idxs[1:], idxs[:-1]):
1155                 if step is not None:
1156                     if i - prev == step:
1157                         continue
1158                     yield _genslice(start, prev, step)
1159                     step = None
1160                     continue
1161                 if i - prev in [-1, 1]:
1162                     step = i - prev
1163                     start = prev
1164                     continue
1165                 else:
1166                     yield 's[%d]' % prev
1167             if step is None:
1168                 yield 's[%d]' % i
1169             else:
1170                 yield _genslice(start, i, step)
1171
1172         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1173         cache_res = func(test_string)
1174         cache_spec = [ord(c) for c in cache_res]
1175         expr_code = ' + '.join(gen_sig_code(cache_spec))
1176         signature_id_tuple = '(%s)' % (
1177             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1178         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1179                 '    return %s\n') % (signature_id_tuple, expr_code)
1180         self.to_screen('Extracted signature function:\n' + code)
1181
1182     def _parse_sig_js(self, jscode):
1183         funcname = self._search_regex(
1184             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1185              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1186              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1187              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1188             jscode, 'Initial JS player signature function name', group='sig')
1189
1190         jsi = JSInterpreter(jscode)
1191         initial_function = jsi.extract_function(funcname)
1192         return lambda s: initial_function([s])
1193
1194     def _parse_sig_swf(self, file_contents):
1195         swfi = SWFInterpreter(file_contents)
1196         TARGET_CLASSNAME = 'SignatureDecipher'
1197         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1198         initial_function = swfi.extract_function(searched_class, 'decipher')
1199         return lambda s: initial_function([s])
1200
1201     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1202         """Turn the encrypted s field into a working signature"""
1203
1204         if player_url is None:
1205             raise ExtractorError('Cannot decrypt signature without player_url')
1206
1207         if player_url.startswith('//'):
1208             player_url = 'https:' + player_url
1209         elif not re.match(r'https?://', player_url):
1210             player_url = compat_urlparse.urljoin(
1211                 'https://www.youtube.com', player_url)
1212         try:
1213             player_id = (player_url, self._signature_cache_id(s))
1214             if player_id not in self._player_cache:
1215                 func = self._extract_signature_function(
1216                     video_id, player_url, s
1217                 )
1218                 self._player_cache[player_id] = func
1219             func = self._player_cache[player_id]
1220             if self._downloader.params.get('youtube_print_sig_code'):
1221                 self._print_sig_code(func, s)
1222             return func(s)
1223         except Exception as e:
1224             tb = traceback.format_exc()
1225             raise ExtractorError(
1226                 'Signature extraction failed: ' + tb, cause=e)
1227
1228     def _get_subtitles(self, video_id, webpage):
1229         try:
1230             subs_doc = self._download_xml(
1231                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1232                 video_id, note=False)
1233         except ExtractorError as err:
1234             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1235             return {}
1236
1237         sub_lang_list = {}
1238         for track in subs_doc.findall('track'):
1239             lang = track.attrib['lang_code']
1240             if lang in sub_lang_list:
1241                 continue
1242             sub_formats = []
1243             for ext in self._SUBTITLE_FORMATS:
1244                 params = compat_urllib_parse_urlencode({
1245                     'lang': lang,
1246                     'v': video_id,
1247                     'fmt': ext,
1248                     'name': track.attrib['name'].encode('utf-8'),
1249                 })
1250                 sub_formats.append({
1251                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1252                     'ext': ext,
1253                 })
1254             sub_lang_list[lang] = sub_formats
1255         if not sub_lang_list:
1256             self._downloader.report_warning('video doesn\'t have subtitles')
1257             return {}
1258         return sub_lang_list
1259
1260     def _get_ytplayer_config(self, video_id, webpage):
1261         patterns = (
1262             # User data may contain arbitrary character sequences that may affect
1263             # JSON extraction with regex, e.g. when '};' is contained the second
1264             # regex won't capture the whole JSON. Yet working around by trying more
1265             # concrete regex first keeping in mind proper quoted string handling
1266             # to be implemented in future that will replace this workaround (see
1267             # https://github.com/rg3/youtube-dl/issues/7468,
1268             # https://github.com/rg3/youtube-dl/pull/7599)
1269             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1270             r';ytplayer\.config\s*=\s*({.+?});',
1271         )
1272         config = self._search_regex(
1273             patterns, webpage, 'ytplayer.config', default=None)
1274         if config:
1275             return self._parse_json(
1276                 uppercase_escape(config), video_id, fatal=False)
1277
1278     def _get_automatic_captions(self, video_id, webpage):
1279         """We need the webpage for getting the captions url, pass it as an
1280            argument to speed up the process."""
1281         self.to_screen('%s: Looking for automatic captions' % video_id)
1282         player_config = self._get_ytplayer_config(video_id, webpage)
1283         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1284         if not player_config:
1285             self._downloader.report_warning(err_msg)
1286             return {}
1287         try:
1288             args = player_config['args']
1289             caption_url = args.get('ttsurl')
1290             if caption_url:
1291                 timestamp = args['timestamp']
1292                 # We get the available subtitles
1293                 list_params = compat_urllib_parse_urlencode({
1294                     'type': 'list',
1295                     'tlangs': 1,
1296                     'asrs': 1,
1297                 })
1298                 list_url = caption_url + '&' + list_params
1299                 caption_list = self._download_xml(list_url, video_id)
1300                 original_lang_node = caption_list.find('track')
1301                 if original_lang_node is None:
1302                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1303                     return {}
1304                 original_lang = original_lang_node.attrib['lang_code']
1305                 caption_kind = original_lang_node.attrib.get('kind', '')
1306
1307                 sub_lang_list = {}
1308                 for lang_node in caption_list.findall('target'):
1309                     sub_lang = lang_node.attrib['lang_code']
1310                     sub_formats = []
1311                     for ext in self._SUBTITLE_FORMATS:
1312                         params = compat_urllib_parse_urlencode({
1313                             'lang': original_lang,
1314                             'tlang': sub_lang,
1315                             'fmt': ext,
1316                             'ts': timestamp,
1317                             'kind': caption_kind,
1318                         })
1319                         sub_formats.append({
1320                             'url': caption_url + '&' + params,
1321                             'ext': ext,
1322                         })
1323                     sub_lang_list[sub_lang] = sub_formats
1324                 return sub_lang_list
1325
1326             def make_captions(sub_url, sub_langs):
1327                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1328                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1329                 captions = {}
1330                 for sub_lang in sub_langs:
1331                     sub_formats = []
1332                     for ext in self._SUBTITLE_FORMATS:
1333                         caption_qs.update({
1334                             'tlang': [sub_lang],
1335                             'fmt': [ext],
1336                         })
1337                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1338                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1339                         sub_formats.append({
1340                             'url': sub_url,
1341                             'ext': ext,
1342                         })
1343                     captions[sub_lang] = sub_formats
1344                 return captions
1345
1346             # New captions format as of 22.06.2017
1347             player_response = args.get('player_response')
1348             if player_response and isinstance(player_response, compat_str):
1349                 player_response = self._parse_json(
1350                     player_response, video_id, fatal=False)
1351                 if player_response:
1352                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1353                     base_url = renderer['captionTracks'][0]['baseUrl']
1354                     sub_lang_list = []
1355                     for lang in renderer['translationLanguages']:
1356                         lang_code = lang.get('languageCode')
1357                         if lang_code:
1358                             sub_lang_list.append(lang_code)
1359                     return make_captions(base_url, sub_lang_list)
1360
1361             # Some videos don't provide ttsurl but rather caption_tracks and
1362             # caption_translation_languages (e.g. 20LmZk1hakA)
1363             # Does not used anymore as of 22.06.2017
1364             caption_tracks = args['caption_tracks']
1365             caption_translation_languages = args['caption_translation_languages']
1366             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1367             sub_lang_list = []
1368             for lang in caption_translation_languages.split(','):
1369                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1370                 sub_lang = lang_qs.get('lc', [None])[0]
1371                 if sub_lang:
1372                     sub_lang_list.append(sub_lang)
1373             return make_captions(caption_url, sub_lang_list)
1374         # An extractor error can be raise by the download process if there are
1375         # no automatic captions but there are subtitles
1376         except (KeyError, IndexError, ExtractorError):
1377             self._downloader.report_warning(err_msg)
1378             return {}
1379
1380     def _mark_watched(self, video_id, video_info):
1381         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1382         if not playback_url:
1383             return
1384         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1385         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1386
1387         # cpn generation algorithm is reverse engineered from base.js.
1388         # In fact it works even with dummy cpn.
1389         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1390         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1391
1392         qs.update({
1393             'ver': ['2'],
1394             'cpn': [cpn],
1395         })
1396         playback_url = compat_urlparse.urlunparse(
1397             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1398
1399         self._download_webpage(
1400             playback_url, video_id, 'Marking watched',
1401             'Unable to mark watched', fatal=False)
1402
1403     @staticmethod
1404     def _extract_urls(webpage):
1405         # Embedded YouTube player
1406         entries = [
1407             unescapeHTML(mobj.group('url'))
1408             for mobj in re.finditer(r'''(?x)
1409             (?:
1410                 <iframe[^>]+?src=|
1411                 data-video-url=|
1412                 <embed[^>]+?src=|
1413                 embedSWF\(?:\s*|
1414                 <object[^>]+data=|
1415                 new\s+SWFObject\(
1416             )
1417             (["\'])
1418                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1419                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1420             \1''', webpage)]
1421
1422         # lazyYT YouTube embed
1423         entries.extend(list(map(
1424             unescapeHTML,
1425             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1426
1427         # Wordpress "YouTube Video Importer" plugin
1428         matches = re.findall(r'''(?x)<div[^>]+
1429             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1430             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1431         entries.extend(m[-1] for m in matches)
1432
1433         return entries
1434
1435     @staticmethod
1436     def _extract_url(webpage):
1437         urls = YoutubeIE._extract_urls(webpage)
1438         return urls[0] if urls else None
1439
1440     @classmethod
1441     def extract_id(cls, url):
1442         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1443         if mobj is None:
1444             raise ExtractorError('Invalid URL: %s' % url)
1445         video_id = mobj.group(2)
1446         return video_id
1447
1448     def _extract_annotations(self, video_id):
1449         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1450         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1451
1452     @staticmethod
1453     def _extract_chapters(description, duration):
1454         if not description:
1455             return None
1456         chapter_lines = re.findall(
1457             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1458             description)
1459         if not chapter_lines:
1460             return None
1461         chapters = []
1462         for next_num, (chapter_line, time_point) in enumerate(
1463                 chapter_lines, start=1):
1464             start_time = parse_duration(time_point)
1465             if start_time is None:
1466                 continue
1467             if start_time > duration:
1468                 break
1469             end_time = (duration if next_num == len(chapter_lines)
1470                         else parse_duration(chapter_lines[next_num][1]))
1471             if end_time is None:
1472                 continue
1473             if end_time > duration:
1474                 end_time = duration
1475             if start_time > end_time:
1476                 break
1477             chapter_title = re.sub(
1478                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1479             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1480             chapters.append({
1481                 'start_time': start_time,
1482                 'end_time': end_time,
1483                 'title': chapter_title,
1484             })
1485         return chapters
1486
1487     def _real_extract(self, url):
1488         url, smuggled_data = unsmuggle_url(url, {})
1489
1490         proto = (
1491             'http' if self._downloader.params.get('prefer_insecure', False)
1492             else 'https')
1493
1494         start_time = None
1495         end_time = None
1496         parsed_url = compat_urllib_parse_urlparse(url)
1497         for component in [parsed_url.fragment, parsed_url.query]:
1498             query = compat_parse_qs(component)
1499             if start_time is None and 't' in query:
1500                 start_time = parse_duration(query['t'][0])
1501             if start_time is None and 'start' in query:
1502                 start_time = parse_duration(query['start'][0])
1503             if end_time is None and 'end' in query:
1504                 end_time = parse_duration(query['end'][0])
1505
1506         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1507         mobj = re.search(self._NEXT_URL_RE, url)
1508         if mobj:
1509             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1510         video_id = self.extract_id(url)
1511
1512         # Get video webpage
1513         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1514         video_webpage = self._download_webpage(url, video_id)
1515
1516         # Attempt to extract SWF player URL
1517         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1518         if mobj is not None:
1519             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1520         else:
1521             player_url = None
1522
1523         dash_mpds = []
1524
1525         def add_dash_mpd(video_info):
1526             dash_mpd = video_info.get('dashmpd')
1527             if dash_mpd and dash_mpd[0] not in dash_mpds:
1528                 dash_mpds.append(dash_mpd[0])
1529
1530         is_live = None
1531         view_count = None
1532
1533         def extract_view_count(v_info):
1534             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1535
1536         # Get video info
1537         embed_webpage = None
1538         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1539             age_gate = True
1540             # We simulate the access to the video from www.youtube.com/v/{video_id}
1541             # this can be viewed without login into Youtube
1542             url = proto + '://www.youtube.com/embed/%s' % video_id
1543             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1544             data = compat_urllib_parse_urlencode({
1545                 'video_id': video_id,
1546                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1547                 'sts': self._search_regex(
1548                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1549             })
1550             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1551             video_info_webpage = self._download_webpage(
1552                 video_info_url, video_id,
1553                 note='Refetching age-gated info webpage',
1554                 errnote='unable to download video info webpage')
1555             video_info = compat_parse_qs(video_info_webpage)
1556             add_dash_mpd(video_info)
1557         else:
1558             age_gate = False
1559             video_info = None
1560             sts = None
1561             # Try looking directly into the video webpage
1562             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1563             if ytplayer_config:
1564                 args = ytplayer_config['args']
1565                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1566                     # Convert to the same format returned by compat_parse_qs
1567                     video_info = dict((k, [v]) for k, v in args.items())
1568                     add_dash_mpd(video_info)
1569                 # Rental video is not rented but preview is available (e.g.
1570                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1571                 # https://github.com/rg3/youtube-dl/issues/10532)
1572                 if not video_info and args.get('ypc_vid'):
1573                     return self.url_result(
1574                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1575                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1576                     is_live = True
1577                 sts = ytplayer_config.get('sts')
1578             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1579                 # We also try looking in get_video_info since it may contain different dashmpd
1580                 # URL that points to a DASH manifest with possibly different itag set (some itags
1581                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1582                 # manifest pointed by get_video_info's dashmpd).
1583                 # The general idea is to take a union of itags of both DASH manifests (for example
1584                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1585                 self.report_video_info_webpage_download(video_id)
1586                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1587                     query = {
1588                         'video_id': video_id,
1589                         'ps': 'default',
1590                         'eurl': '',
1591                         'gl': 'US',
1592                         'hl': 'en',
1593                     }
1594                     if el:
1595                         query['el'] = el
1596                     if sts:
1597                         query['sts'] = sts
1598                     video_info_webpage = self._download_webpage(
1599                         '%s://www.youtube.com/get_video_info' % proto,
1600                         video_id, note=False,
1601                         errnote='unable to download video info webpage',
1602                         fatal=False, query=query)
1603                     if not video_info_webpage:
1604                         continue
1605                     get_video_info = compat_parse_qs(video_info_webpage)
1606                     add_dash_mpd(get_video_info)
1607                     if view_count is None:
1608                         view_count = extract_view_count(get_video_info)
1609                     if not video_info:
1610                         video_info = get_video_info
1611                     if 'token' in get_video_info:
1612                         # Different get_video_info requests may report different results, e.g.
1613                         # some may report video unavailability, but some may serve it without
1614                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1615                         # the original webpage as well as el=info and el=embedded get_video_info
1616                         # requests report video unavailability due to geo restriction while
1617                         # el=detailpage succeeds and returns valid data). This is probably
1618                         # due to YouTube measures against IP ranges of hosting providers.
1619                         # Working around by preferring the first succeeded video_info containing
1620                         # the token if no such video_info yet was found.
1621                         if 'token' not in video_info:
1622                             video_info = get_video_info
1623                         break
1624
1625         def extract_unavailable_message():
1626             return self._html_search_regex(
1627                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1628                 video_webpage, 'unavailable message', default=None)
1629
1630         if 'token' not in video_info:
1631             if 'reason' in video_info:
1632                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1633                     regions_allowed = self._html_search_meta(
1634                         'regionsAllowed', video_webpage, default=None)
1635                     countries = regions_allowed.split(',') if regions_allowed else None
1636                     self.raise_geo_restricted(
1637                         msg=video_info['reason'][0], countries=countries)
1638                 reason = video_info['reason'][0]
1639                 if 'Invalid parameters' in reason:
1640                     unavailable_message = extract_unavailable_message()
1641                     if unavailable_message:
1642                         reason = unavailable_message
1643                 raise ExtractorError(
1644                     'YouTube said: %s' % reason,
1645                     expected=True, video_id=video_id)
1646             else:
1647                 raise ExtractorError(
1648                     '"token" parameter not in video info for unknown reason',
1649                     video_id=video_id)
1650
1651         # title
1652         if 'title' in video_info:
1653             video_title = video_info['title'][0]
1654         else:
1655             self._downloader.report_warning('Unable to extract video title')
1656             video_title = '_'
1657
1658         # description
1659         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1660         if video_description:
1661
1662             def replace_url(m):
1663                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1664                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1665                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1666                     qs = compat_parse_qs(parsed_redir_url.query)
1667                     q = qs.get('q')
1668                     if q and q[0]:
1669                         return q[0]
1670                 return redir_url
1671
1672             description_original = video_description = re.sub(r'''(?x)
1673                 <a\s+
1674                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1675                     (?:title|href)="([^"]+)"\s+
1676                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1677                     class="[^"]*"[^>]*>
1678                 [^<]+\.{3}\s*
1679                 </a>
1680             ''', replace_url, video_description)
1681             video_description = clean_html(video_description)
1682         else:
1683             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1684             if fd_mobj:
1685                 video_description = unescapeHTML(fd_mobj.group(1))
1686             else:
1687                 video_description = ''
1688
1689         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1690             if not self._downloader.params.get('noplaylist'):
1691                 entries = []
1692                 feed_ids = []
1693                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1694                 for feed in multifeed_metadata_list.split(','):
1695                     # Unquote should take place before split on comma (,) since textual
1696                     # fields may contain comma as well (see
1697                     # https://github.com/rg3/youtube-dl/issues/8536)
1698                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1699                     entries.append({
1700                         '_type': 'url_transparent',
1701                         'ie_key': 'Youtube',
1702                         'url': smuggle_url(
1703                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1704                             {'force_singlefeed': True}),
1705                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1706                     })
1707                     feed_ids.append(feed_data['id'][0])
1708                 self.to_screen(
1709                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1710                     % (', '.join(feed_ids), video_id))
1711                 return self.playlist_result(entries, video_id, video_title, video_description)
1712             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1713
1714         if view_count is None:
1715             view_count = extract_view_count(video_info)
1716
1717         # Check for "rental" videos
1718         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1719             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1720
1721         def _extract_filesize(media_url):
1722             return int_or_none(self._search_regex(
1723                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1724
1725         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1726             self.report_rtmp_download()
1727             formats = [{
1728                 'format_id': '_rtmp',
1729                 'protocol': 'rtmp',
1730                 'url': video_info['conn'][0],
1731                 'player_url': player_url,
1732             }]
1733         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1734             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1735             if 'rtmpe%3Dyes' in encoded_url_map:
1736                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1737             formats_spec = {}
1738             fmt_list = video_info.get('fmt_list', [''])[0]
1739             if fmt_list:
1740                 for fmt in fmt_list.split(','):
1741                     spec = fmt.split('/')
1742                     if len(spec) > 1:
1743                         width_height = spec[1].split('x')
1744                         if len(width_height) == 2:
1745                             formats_spec[spec[0]] = {
1746                                 'resolution': spec[1],
1747                                 'width': int_or_none(width_height[0]),
1748                                 'height': int_or_none(width_height[1]),
1749                             }
1750             q = qualities(['small', 'medium', 'hd720'])
1751             formats = []
1752             for url_data_str in encoded_url_map.split(','):
1753                 url_data = compat_parse_qs(url_data_str)
1754                 if 'itag' not in url_data or 'url' not in url_data:
1755                     continue
1756                 format_id = url_data['itag'][0]
1757                 url = url_data['url'][0]
1758
1759                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1760                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1761                     jsplayer_url_json = self._search_regex(
1762                         ASSETS_RE,
1763                         embed_webpage if age_gate else video_webpage,
1764                         'JS player URL (1)', default=None)
1765                     if not jsplayer_url_json and not age_gate:
1766                         # We need the embed website after all
1767                         if embed_webpage is None:
1768                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1769                             embed_webpage = self._download_webpage(
1770                                 embed_url, video_id, 'Downloading embed webpage')
1771                         jsplayer_url_json = self._search_regex(
1772                             ASSETS_RE, embed_webpage, 'JS player URL')
1773
1774                     player_url = json.loads(jsplayer_url_json)
1775                     if player_url is None:
1776                         player_url_json = self._search_regex(
1777                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1778                             video_webpage, 'age gate player URL')
1779                         player_url = json.loads(player_url_json)
1780
1781                 if 'sig' in url_data:
1782                     url += '&signature=' + url_data['sig'][0]
1783                 elif 's' in url_data:
1784                     encrypted_sig = url_data['s'][0]
1785
1786                     if self._downloader.params.get('verbose'):
1787                         if player_url is None:
1788                             player_version = 'unknown'
1789                             player_desc = 'unknown'
1790                         else:
1791                             if player_url.endswith('swf'):
1792                                 player_version = self._search_regex(
1793                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1794                                     'flash player', fatal=False)
1795                                 player_desc = 'flash player %s' % player_version
1796                             else:
1797                                 player_version = self._search_regex(
1798                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1799                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1800                                     player_url,
1801                                     'html5 player', fatal=False)
1802                                 player_desc = 'html5 player %s' % player_version
1803
1804                         parts_sizes = self._signature_cache_id(encrypted_sig)
1805                         self.to_screen('{%s} signature length %s, %s' %
1806                                        (format_id, parts_sizes, player_desc))
1807
1808                     signature = self._decrypt_signature(
1809                         encrypted_sig, video_id, player_url, age_gate)
1810                     url += '&signature=' + signature
1811                 if 'ratebypass' not in url:
1812                     url += '&ratebypass=yes'
1813
1814                 dct = {
1815                     'format_id': format_id,
1816                     'url': url,
1817                     'player_url': player_url,
1818                 }
1819                 if format_id in self._formats:
1820                     dct.update(self._formats[format_id])
1821                 if format_id in formats_spec:
1822                     dct.update(formats_spec[format_id])
1823
1824                 # Some itags are not included in DASH manifest thus corresponding formats will
1825                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1826                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1827                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1828                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1829
1830                 filesize = int_or_none(url_data.get(
1831                     'clen', [None])[0]) or _extract_filesize(url)
1832
1833                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1834
1835                 more_fields = {
1836                     'filesize': filesize,
1837                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1838                     'width': width,
1839                     'height': height,
1840                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1841                     'format_note': quality,
1842                     'quality': q(quality),
1843                 }
1844                 for key, value in more_fields.items():
1845                     if value:
1846                         dct[key] = value
1847                 type_ = url_data.get('type', [None])[0]
1848                 if type_:
1849                     type_split = type_.split(';')
1850                     kind_ext = type_split[0].split('/')
1851                     if len(kind_ext) == 2:
1852                         kind, _ = kind_ext
1853                         dct['ext'] = mimetype2ext(type_split[0])
1854                         if kind in ('audio', 'video'):
1855                             codecs = None
1856                             for mobj in re.finditer(
1857                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1858                                 if mobj.group('key') == 'codecs':
1859                                     codecs = mobj.group('val')
1860                                     break
1861                             if codecs:
1862                                 dct.update(parse_codecs(codecs))
1863                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1864                     dct['downloader_options'] = {
1865                         # Youtube throttles chunks >~10M
1866                         'http_chunk_size': 10485760,
1867                     }
1868                 formats.append(dct)
1869         elif video_info.get('hlsvp'):
1870             manifest_url = video_info['hlsvp'][0]
1871             formats = []
1872             m3u8_formats = self._extract_m3u8_formats(
1873                 manifest_url, video_id, 'mp4', fatal=False)
1874             for a_format in m3u8_formats:
1875                 itag = self._search_regex(
1876                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1877                 if itag:
1878                     a_format['format_id'] = itag
1879                     if itag in self._formats:
1880                         dct = self._formats[itag].copy()
1881                         dct.update(a_format)
1882                         a_format = dct
1883                 a_format['player_url'] = player_url
1884                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1885                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1886                 formats.append(a_format)
1887         else:
1888             error_message = clean_html(video_info.get('reason', [None])[0])
1889             if not error_message:
1890                 error_message = extract_unavailable_message()
1891             if error_message:
1892                 raise ExtractorError(error_message, expected=True)
1893             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1894
1895         # uploader
1896         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1897         if video_uploader:
1898             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1899         else:
1900             self._downloader.report_warning('unable to extract uploader name')
1901
1902         # uploader_id
1903         video_uploader_id = None
1904         video_uploader_url = None
1905         mobj = re.search(
1906             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1907             video_webpage)
1908         if mobj is not None:
1909             video_uploader_id = mobj.group('uploader_id')
1910             video_uploader_url = mobj.group('uploader_url')
1911         else:
1912             self._downloader.report_warning('unable to extract uploader nickname')
1913
1914         channel_id = self._html_search_meta(
1915             'channelId', video_webpage, 'channel id')
1916         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1917
1918         # thumbnail image
1919         # We try first to get a high quality image:
1920         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1921                             video_webpage, re.DOTALL)
1922         if m_thumb is not None:
1923             video_thumbnail = m_thumb.group(1)
1924         elif 'thumbnail_url' not in video_info:
1925             self._downloader.report_warning('unable to extract video thumbnail')
1926             video_thumbnail = None
1927         else:   # don't panic if we can't find it
1928             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1929
1930         # upload date
1931         upload_date = self._html_search_meta(
1932             'datePublished', video_webpage, 'upload date', default=None)
1933         if not upload_date:
1934             upload_date = self._search_regex(
1935                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1936                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1937                 video_webpage, 'upload date', default=None)
1938         upload_date = unified_strdate(upload_date)
1939
1940         video_license = self._html_search_regex(
1941             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1942             video_webpage, 'license', default=None)
1943
1944         m_music = re.search(
1945             r'''(?x)
1946                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1947                 <ul[^>]*>\s*
1948                 <li>(?P<title>.+?)
1949                 by (?P<creator>.+?)
1950                 (?:
1951                     \(.+?\)|
1952                     <a[^>]*
1953                         (?:
1954                             \bhref=["\']/red[^>]*>|             # drop possible
1955                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1956                         )
1957                     .*?
1958                 )?</li
1959             ''',
1960             video_webpage)
1961         if m_music:
1962             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1963             video_creator = clean_html(m_music.group('creator'))
1964         else:
1965             video_alt_title = video_creator = None
1966
1967         def extract_meta(field):
1968             return self._html_search_regex(
1969                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1970                 video_webpage, field, default=None)
1971
1972         track = extract_meta('Song')
1973         artist = extract_meta('Artist')
1974
1975         m_episode = re.search(
1976             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1977             video_webpage)
1978         if m_episode:
1979             series = m_episode.group('series')
1980             season_number = int(m_episode.group('season'))
1981             episode_number = int(m_episode.group('episode'))
1982         else:
1983             series = season_number = episode_number = None
1984
1985         m_cat_container = self._search_regex(
1986             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1987             video_webpage, 'categories', default=None)
1988         if m_cat_container:
1989             category = self._html_search_regex(
1990                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1991                 default=None)
1992             video_categories = None if category is None else [category]
1993         else:
1994             video_categories = None
1995
1996         video_tags = [
1997             unescapeHTML(m.group('content'))
1998             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1999
2000         def _extract_count(count_name):
2001             return str_to_int(self._search_regex(
2002                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2003                 % re.escape(count_name),
2004                 video_webpage, count_name, default=None))
2005
2006         like_count = _extract_count('like')
2007         dislike_count = _extract_count('dislike')
2008
2009         # subtitles
2010         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2011         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2012
2013         video_duration = try_get(
2014             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2015         if not video_duration:
2016             video_duration = parse_duration(self._html_search_meta(
2017                 'duration', video_webpage, 'video duration'))
2018
2019         # annotations
2020         video_annotations = None
2021         if self._downloader.params.get('writeannotations', False):
2022             video_annotations = self._extract_annotations(video_id)
2023
2024         chapters = self._extract_chapters(description_original, video_duration)
2025
2026         # Look for the DASH manifest
2027         if self._downloader.params.get('youtube_include_dash_manifest', True):
2028             dash_mpd_fatal = True
2029             for mpd_url in dash_mpds:
2030                 dash_formats = {}
2031                 try:
2032                     def decrypt_sig(mobj):
2033                         s = mobj.group(1)
2034                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2035                         return '/signature/%s' % dec_s
2036
2037                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2038
2039                     for df in self._extract_mpd_formats(
2040                             mpd_url, video_id, fatal=dash_mpd_fatal,
2041                             formats_dict=self._formats):
2042                         if not df.get('filesize'):
2043                             df['filesize'] = _extract_filesize(df['url'])
2044                         # Do not overwrite DASH format found in some previous DASH manifest
2045                         if df['format_id'] not in dash_formats:
2046                             dash_formats[df['format_id']] = df
2047                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2048                         # allow them to fail without bug report message if we already have
2049                         # some DASH manifest succeeded. This is temporary workaround to reduce
2050                         # burst of bug reports until we figure out the reason and whether it
2051                         # can be fixed at all.
2052                         dash_mpd_fatal = False
2053                 except (ExtractorError, KeyError) as e:
2054                     self.report_warning(
2055                         'Skipping DASH manifest: %r' % e, video_id)
2056                 if dash_formats:
2057                     # Remove the formats we found through non-DASH, they
2058                     # contain less info and it can be wrong, because we use
2059                     # fixed values (for example the resolution). See
2060                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2061                     # example.
2062                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2063                     formats.extend(dash_formats.values())
2064
2065         # Check for malformed aspect ratio
2066         stretched_m = re.search(
2067             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2068             video_webpage)
2069         if stretched_m:
2070             w = float(stretched_m.group('w'))
2071             h = float(stretched_m.group('h'))
2072             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2073             # We will only process correct ratios.
2074             if w > 0 and h > 0:
2075                 ratio = w / h
2076                 for f in formats:
2077                     if f.get('vcodec') != 'none':
2078                         f['stretched_ratio'] = ratio
2079
2080         self._sort_formats(formats)
2081
2082         self.mark_watched(video_id, video_info)
2083
2084         return {
2085             'id': video_id,
2086             'uploader': video_uploader,
2087             'uploader_id': video_uploader_id,
2088             'uploader_url': video_uploader_url,
2089             'channel_id': channel_id,
2090             'channel_url': channel_url,
2091             'upload_date': upload_date,
2092             'license': video_license,
2093             'creator': video_creator or artist,
2094             'title': video_title,
2095             'alt_title': video_alt_title or track,
2096             'thumbnail': video_thumbnail,
2097             'description': video_description,
2098             'categories': video_categories,
2099             'tags': video_tags,
2100             'subtitles': video_subtitles,
2101             'automatic_captions': automatic_captions,
2102             'duration': video_duration,
2103             'age_limit': 18 if age_gate else 0,
2104             'annotations': video_annotations,
2105             'chapters': chapters,
2106             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2107             'view_count': view_count,
2108             'like_count': like_count,
2109             'dislike_count': dislike_count,
2110             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2111             'formats': formats,
2112             'is_live': is_live,
2113             'start_time': start_time,
2114             'end_time': end_time,
2115             'series': series,
2116             'season_number': season_number,
2117             'episode_number': episode_number,
2118             'track': track,
2119             'artist': artist,
2120         }
2121
2122
2123 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2124     IE_DESC = 'YouTube.com playlists'
2125     _VALID_URL = r"""(?x)(?:
2126                         (?:https?://)?
2127                         (?:\w+\.)?
2128                         (?:
2129                             youtube\.com/
2130                             (?:
2131                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2132                                \? (?:.*?[&;])*? (?:p|a|list)=
2133                             |  p/
2134                             )|
2135                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2136                         )
2137                         (
2138                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2139                             # Top tracks, they can also include dots
2140                             |(?:MC)[\w\.]*
2141                         )
2142                         .*
2143                      |
2144                         (%(playlist_id)s)
2145                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2146     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2147     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2148     IE_NAME = 'youtube:playlist'
2149     _TESTS = [{
2150         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2151         'info_dict': {
2152             'title': 'ytdl test PL',
2153             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2154         },
2155         'playlist_count': 3,
2156     }, {
2157         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2158         'info_dict': {
2159             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2160             'title': 'YDL_Empty_List',
2161         },
2162         'playlist_count': 0,
2163         'skip': 'This playlist is private',
2164     }, {
2165         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2166         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2167         'info_dict': {
2168             'title': '29C3: Not my department',
2169             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2170         },
2171         'playlist_count': 95,
2172     }, {
2173         'note': 'issue #673',
2174         'url': 'PLBB231211A4F62143',
2175         'info_dict': {
2176             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2177             'id': 'PLBB231211A4F62143',
2178         },
2179         'playlist_mincount': 26,
2180     }, {
2181         'note': 'Large playlist',
2182         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2183         'info_dict': {
2184             'title': 'Uploads from Cauchemar',
2185             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2186         },
2187         'playlist_mincount': 799,
2188     }, {
2189         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2190         'info_dict': {
2191             'title': 'YDL_safe_search',
2192             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2193         },
2194         'playlist_count': 2,
2195         'skip': 'This playlist is private',
2196     }, {
2197         'note': 'embedded',
2198         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2199         'playlist_count': 4,
2200         'info_dict': {
2201             'title': 'JODA15',
2202             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2203         }
2204     }, {
2205         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2206         'playlist_mincount': 485,
2207         'info_dict': {
2208             'title': '2017 華語最新單曲 (2/24更新)',
2209             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2210         }
2211     }, {
2212         'note': 'Embedded SWF player',
2213         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2214         'playlist_count': 4,
2215         'info_dict': {
2216             'title': 'JODA7',
2217             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2218         }
2219     }, {
2220         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2221         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2222         'info_dict': {
2223             'title': 'Uploads from Interstellar Movie',
2224             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2225         },
2226         'playlist_mincount': 21,
2227     }, {
2228         # Playlist URL that does not actually serve a playlist
2229         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2230         'info_dict': {
2231             'id': 'FqZTN594JQw',
2232             'ext': 'webm',
2233             'title': "Smiley's People 01 detective, Adventure Series, Action",
2234             'uploader': 'STREEM',
2235             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2236             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2237             'upload_date': '20150526',
2238             'license': 'Standard YouTube License',
2239             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2240             'categories': ['People & Blogs'],
2241             'tags': list,
2242             'like_count': int,
2243             'dislike_count': int,
2244         },
2245         'params': {
2246             'skip_download': True,
2247         },
2248         'add_ie': [YoutubeIE.ie_key()],
2249     }, {
2250         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2251         'info_dict': {
2252             'id': 'yeWKywCrFtk',
2253             'ext': 'mp4',
2254             'title': 'Small Scale Baler and Braiding Rugs',
2255             'uploader': 'Backus-Page House Museum',
2256             'uploader_id': 'backuspagemuseum',
2257             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2258             'upload_date': '20161008',
2259             'license': 'Standard YouTube License',
2260             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2261             'categories': ['Nonprofits & Activism'],
2262             'tags': list,
2263             'like_count': int,
2264             'dislike_count': int,
2265         },
2266         'params': {
2267             'noplaylist': True,
2268             'skip_download': True,
2269         },
2270     }, {
2271         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2272         'only_matching': True,
2273     }, {
2274         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2275         'only_matching': True,
2276     }, {
2277         # music album playlist
2278         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2279         'only_matching': True,
2280     }]
2281
2282     def _real_initialize(self):
2283         self._login()
2284
2285     def _extract_mix(self, playlist_id):
2286         # The mixes are generated from a single video
2287         # the id of the playlist is just 'RD' + video_id
2288         ids = []
2289         last_id = playlist_id[-11:]
2290         for n in itertools.count(1):
2291             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2292             webpage = self._download_webpage(
2293                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2294             new_ids = orderedSet(re.findall(
2295                 r'''(?xs)data-video-username=".*?".*?
2296                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2297                 webpage))
2298             # Fetch new pages until all the videos are repeated, it seems that
2299             # there are always 51 unique videos.
2300             new_ids = [_id for _id in new_ids if _id not in ids]
2301             if not new_ids:
2302                 break
2303             ids.extend(new_ids)
2304             last_id = ids[-1]
2305
2306         url_results = self._ids_to_results(ids)
2307
2308         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2309         title_span = (
2310             search_title('playlist-title') or
2311             search_title('title long-title') or
2312             search_title('title'))
2313         title = clean_html(title_span)
2314
2315         return self.playlist_result(url_results, playlist_id, title)
2316
2317     def _extract_playlist(self, playlist_id):
2318         url = self._TEMPLATE_URL % playlist_id
2319         page = self._download_webpage(url, playlist_id)
2320
2321         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2322         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2323             match = match.strip()
2324             # Check if the playlist exists or is private
2325             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2326             if mobj:
2327                 reason = mobj.group('reason')
2328                 message = 'This playlist %s' % reason
2329                 if 'private' in reason:
2330                     message += ', use --username or --netrc to access it'
2331                 message += '.'
2332                 raise ExtractorError(message, expected=True)
2333             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2334                 raise ExtractorError(
2335                     'Invalid parameters. Maybe URL is incorrect.',
2336                     expected=True)
2337             elif re.match(r'[^<]*Choose your language[^<]*', match):
2338                 continue
2339             else:
2340                 self.report_warning('Youtube gives an alert message: ' + match)
2341
2342         playlist_title = self._html_search_regex(
2343             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2344             page, 'title', default=None)
2345
2346         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2347         uploader = self._search_regex(
2348             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2349             page, 'uploader', default=None)
2350         mobj = re.search(
2351             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2352             page)
2353         if mobj:
2354             uploader_id = mobj.group('uploader_id')
2355             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2356         else:
2357             uploader_id = uploader_url = None
2358
2359         has_videos = True
2360
2361         if not playlist_title:
2362             try:
2363                 # Some playlist URLs don't actually serve a playlist (e.g.
2364                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2365                 next(self._entries(page, playlist_id))
2366             except StopIteration:
2367                 has_videos = False
2368
2369         playlist = self.playlist_result(
2370             self._entries(page, playlist_id), playlist_id, playlist_title)
2371         playlist.update({
2372             'uploader': uploader,
2373             'uploader_id': uploader_id,
2374             'uploader_url': uploader_url,
2375         })
2376
2377         return has_videos, playlist
2378
2379     def _check_download_just_video(self, url, playlist_id):
2380         # Check if it's a video-specific URL
2381         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2382         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2383             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2384             'video id', default=None)
2385         if video_id:
2386             if self._downloader.params.get('noplaylist'):
2387                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2388                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2389             else:
2390                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2391                 return video_id, None
2392         return None, None
2393
2394     def _real_extract(self, url):
2395         # Extract playlist id
2396         mobj = re.match(self._VALID_URL, url)
2397         if mobj is None:
2398             raise ExtractorError('Invalid URL: %s' % url)
2399         playlist_id = mobj.group(1) or mobj.group(2)
2400
2401         video_id, video = self._check_download_just_video(url, playlist_id)
2402         if video:
2403             return video
2404
2405         if playlist_id.startswith(('RD', 'UL', 'PU')):
2406             # Mixes require a custom extraction process
2407             return self._extract_mix(playlist_id)
2408
2409         has_videos, playlist = self._extract_playlist(playlist_id)
2410         if has_videos or not video_id:
2411             return playlist
2412
2413         # Some playlist URLs don't actually serve a playlist (see
2414         # https://github.com/rg3/youtube-dl/issues/10537).
2415         # Fallback to plain video extraction if there is a video id
2416         # along with playlist id.
2417         return self.url_result(video_id, 'Youtube', video_id=video_id)
2418
2419
2420 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2421     IE_DESC = 'YouTube.com channels'
2422     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2423     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2424     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2425     IE_NAME = 'youtube:channel'
2426     _TESTS = [{
2427         'note': 'paginated channel',
2428         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2429         'playlist_mincount': 91,
2430         'info_dict': {
2431             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2432             'title': 'Uploads from lex will',
2433         }
2434     }, {
2435         'note': 'Age restricted channel',
2436         # from https://www.youtube.com/user/DeusExOfficial
2437         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2438         'playlist_mincount': 64,
2439         'info_dict': {
2440             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2441             'title': 'Uploads from Deus Ex',
2442         },
2443     }]
2444
2445     @classmethod
2446     def suitable(cls, url):
2447         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2448                 else super(YoutubeChannelIE, cls).suitable(url))
2449
2450     def _build_template_url(self, url, channel_id):
2451         return self._TEMPLATE_URL % channel_id
2452
2453     def _real_extract(self, url):
2454         channel_id = self._match_id(url)
2455
2456         url = self._build_template_url(url, channel_id)
2457
2458         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2459         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2460         # otherwise fallback on channel by page extraction
2461         channel_page = self._download_webpage(
2462             url + '?view=57', channel_id,
2463             'Downloading channel page', fatal=False)
2464         if channel_page is False:
2465             channel_playlist_id = False
2466         else:
2467             channel_playlist_id = self._html_search_meta(
2468                 'channelId', channel_page, 'channel id', default=None)
2469             if not channel_playlist_id:
2470                 channel_url = self._html_search_meta(
2471                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2472                     channel_page, 'channel url', default=None)
2473                 if channel_url:
2474                     channel_playlist_id = self._search_regex(
2475                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2476                         channel_url, 'channel id', default=None)
2477         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2478             playlist_id = 'UU' + channel_playlist_id[2:]
2479             return self.url_result(
2480                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2481
2482         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2483         autogenerated = re.search(r'''(?x)
2484                 class="[^"]*?(?:
2485                     channel-header-autogenerated-label|
2486                     yt-channel-title-autogenerated
2487                 )[^"]*"''', channel_page) is not None
2488
2489         if autogenerated:
2490             # The videos are contained in a single page
2491             # the ajax pages can't be used, they are empty
2492             entries = [
2493                 self.url_result(
2494                     video_id, 'Youtube', video_id=video_id,
2495                     video_title=video_title)
2496                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2497             return self.playlist_result(entries, channel_id)
2498
2499         try:
2500             next(self._entries(channel_page, channel_id))
2501         except StopIteration:
2502             alert_message = self._html_search_regex(
2503                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2504                 channel_page, 'alert', default=None, group='alert')
2505             if alert_message:
2506                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2507
2508         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2509
2510
2511 class YoutubeUserIE(YoutubeChannelIE):
2512     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2513     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2514     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2515     IE_NAME = 'youtube:user'
2516
2517     _TESTS = [{
2518         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2519         'playlist_mincount': 320,
2520         'info_dict': {
2521             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2522             'title': 'Uploads from The Linux Foundation',
2523         }
2524     }, {
2525         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2526         # but not https://www.youtube.com/user/12minuteathlete/videos
2527         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2528         'playlist_mincount': 249,
2529         'info_dict': {
2530             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2531             'title': 'Uploads from 12 Minute Athlete',
2532         }
2533     }, {
2534         'url': 'ytuser:phihag',
2535         'only_matching': True,
2536     }, {
2537         'url': 'https://www.youtube.com/c/gametrailers',
2538         'only_matching': True,
2539     }, {
2540         'url': 'https://www.youtube.com/gametrailers',
2541         'only_matching': True,
2542     }, {
2543         # This channel is not available, geo restricted to JP
2544         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2545         'only_matching': True,
2546     }]
2547
2548     @classmethod
2549     def suitable(cls, url):
2550         # Don't return True if the url can be extracted with other youtube
2551         # extractor, the regex would is too permissive and it would match.
2552         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2553         if any(ie.suitable(url) for ie in other_yt_ies):
2554             return False
2555         else:
2556             return super(YoutubeUserIE, cls).suitable(url)
2557
2558     def _build_template_url(self, url, channel_id):
2559         mobj = re.match(self._VALID_URL, url)
2560         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2561
2562
2563 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2564     IE_DESC = 'YouTube.com live streams'
2565     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2566     IE_NAME = 'youtube:live'
2567
2568     _TESTS = [{
2569         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2570         'info_dict': {
2571             'id': 'a48o2S1cPoo',
2572             'ext': 'mp4',
2573             'title': 'The Young Turks - Live Main Show',
2574             'uploader': 'The Young Turks',
2575             'uploader_id': 'TheYoungTurks',
2576             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2577             'upload_date': '20150715',
2578             'license': 'Standard YouTube License',
2579             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2580             'categories': ['News & Politics'],
2581             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2582             'like_count': int,
2583             'dislike_count': int,
2584         },
2585         'params': {
2586             'skip_download': True,
2587         },
2588     }, {
2589         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2590         'only_matching': True,
2591     }, {
2592         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2593         'only_matching': True,
2594     }, {
2595         'url': 'https://www.youtube.com/TheYoungTurks/live',
2596         'only_matching': True,
2597     }]
2598
2599     def _real_extract(self, url):
2600         mobj = re.match(self._VALID_URL, url)
2601         channel_id = mobj.group('id')
2602         base_url = mobj.group('base_url')
2603         webpage = self._download_webpage(url, channel_id, fatal=False)
2604         if webpage:
2605             page_type = self._og_search_property(
2606                 'type', webpage, 'page type', default='')
2607             video_id = self._html_search_meta(
2608                 'videoId', webpage, 'video id', default=None)
2609             if page_type.startswith('video') and video_id and re.match(
2610                     r'^[0-9A-Za-z_-]{11}$', video_id):
2611                 return self.url_result(video_id, YoutubeIE.ie_key())
2612         return self.url_result(base_url)
2613
2614
2615 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2616     IE_DESC = 'YouTube.com user/channel playlists'
2617     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2618     IE_NAME = 'youtube:playlists'
2619
2620     _TESTS = [{
2621         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2622         'playlist_mincount': 4,
2623         'info_dict': {
2624             'id': 'ThirstForScience',
2625             'title': 'Thirst for Science',
2626         },
2627     }, {
2628         # with "Load more" button
2629         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2630         'playlist_mincount': 70,
2631         'info_dict': {
2632             'id': 'igorkle1',
2633             'title': 'Игорь Клейнер',
2634         },
2635     }, {
2636         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2637         'playlist_mincount': 17,
2638         'info_dict': {
2639             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2640             'title': 'Chem Player',
2641         },
2642     }]
2643
2644
2645 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2646     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2647
2648
2649 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2650     IE_DESC = 'YouTube.com searches'
2651     # there doesn't appear to be a real limit, for example if you search for
2652     # 'python' you get more than 8.000.000 results
2653     _MAX_RESULTS = float('inf')
2654     IE_NAME = 'youtube:search'
2655     _SEARCH_KEY = 'ytsearch'
2656     _EXTRA_QUERY_ARGS = {}
2657     _TESTS = []
2658
2659     def _get_n_results(self, query, n):
2660         """Get a specified number of results for a query"""
2661
2662         videos = []
2663         limit = n
2664
2665         url_query = {
2666             'search_query': query.encode('utf-8'),
2667         }
2668         url_query.update(self._EXTRA_QUERY_ARGS)
2669         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2670
2671         for pagenum in itertools.count(1):
2672             data = self._download_json(
2673                 result_url, video_id='query "%s"' % query,
2674                 note='Downloading page %s' % pagenum,
2675                 errnote='Unable to download API page',
2676                 query={'spf': 'navigate'})
2677             html_content = data[1]['body']['content']
2678
2679             if 'class="search-message' in html_content:
2680                 raise ExtractorError(
2681                     '[youtube] No video results', expected=True)
2682
2683             new_videos = list(self._process_page(html_content))
2684             videos += new_videos
2685             if not new_videos or len(videos) > limit:
2686                 break
2687             next_link = self._html_search_regex(
2688                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2689                 html_content, 'next link', default=None)
2690             if next_link is None:
2691                 break
2692             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2693
2694         if len(videos) > n:
2695             videos = videos[:n]
2696         return self.playlist_result(videos, query)
2697
2698
2699 class YoutubeSearchDateIE(YoutubeSearchIE):
2700     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2701     _SEARCH_KEY = 'ytsearchdate'
2702     IE_DESC = 'YouTube.com searches, newest videos first'
2703     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2704
2705
2706 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2707     IE_DESC = 'YouTube.com search URLs'
2708     IE_NAME = 'youtube:search_url'
2709     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2710     _TESTS = [{
2711         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2712         'playlist_mincount': 5,
2713         'info_dict': {
2714             'title': 'youtube-dl test video',
2715         }
2716     }, {
2717         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2718         'only_matching': True,
2719     }]
2720
2721     def _real_extract(self, url):
2722         mobj = re.match(self._VALID_URL, url)
2723         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2724         webpage = self._download_webpage(url, query)
2725         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2726
2727
2728 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2729     IE_DESC = 'YouTube.com (multi-season) shows'
2730     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2731     IE_NAME = 'youtube:show'
2732     _TESTS = [{
2733         'url': 'https://www.youtube.com/show/airdisasters',
2734         'playlist_mincount': 5,
2735         'info_dict': {
2736             'id': 'airdisasters',
2737             'title': 'Air Disasters',
2738         }
2739     }]
2740
2741     def _real_extract(self, url):
2742         playlist_id = self._match_id(url)
2743         return super(YoutubeShowIE, self)._real_extract(
2744             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2745
2746
2747 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2748     """
2749     Base class for feed extractors
2750     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2751     """
2752     _LOGIN_REQUIRED = True
2753
2754     @property
2755     def IE_NAME(self):
2756         return 'youtube:%s' % self._FEED_NAME
2757
2758     def _real_initialize(self):
2759         self._login()
2760
2761     def _entries(self, page):
2762         # The extraction process is the same as for playlists, but the regex
2763         # for the video ids doesn't contain an index
2764         ids = []
2765         more_widget_html = content_html = page
2766         for page_num in itertools.count(1):
2767             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2768
2769             # 'recommended' feed has infinite 'load more' and each new portion spins
2770             # the same videos in (sometimes) slightly different order, so we'll check
2771             # for unicity and break when portion has no new videos
2772             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2773             if not new_ids:
2774                 break
2775
2776             ids.extend(new_ids)
2777
2778             for entry in self._ids_to_results(new_ids):
2779                 yield entry
2780
2781             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2782             if not mobj:
2783                 break
2784
2785             more = self._download_json(
2786                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2787                 'Downloading page #%s' % page_num,
2788                 transform_source=uppercase_escape)
2789             content_html = more['content_html']
2790             more_widget_html = more['load_more_widget_html']
2791
2792     def _real_extract(self, url):
2793         page = self._download_webpage(
2794             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2795             self._PLAYLIST_TITLE)
2796         return self.playlist_result(
2797             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2798
2799
2800 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2801     IE_NAME = 'youtube:watchlater'
2802     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2803     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2804
2805     _TESTS = [{
2806         'url': 'https://www.youtube.com/playlist?list=WL',
2807         'only_matching': True,
2808     }, {
2809         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2810         'only_matching': True,
2811     }]
2812
2813     def _real_extract(self, url):
2814         _, video = self._check_download_just_video(url, 'WL')
2815         if video:
2816             return video
2817         _, playlist = self._extract_playlist('WL')
2818         return playlist
2819
2820
2821 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2822     IE_NAME = 'youtube:favorites'
2823     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2824     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2825     _LOGIN_REQUIRED = True
2826
2827     def _real_extract(self, url):
2828         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2829         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2830         return self.url_result(playlist_id, 'YoutubePlaylist')
2831
2832
2833 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2834     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2835     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2836     _FEED_NAME = 'recommended'
2837     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2838
2839
2840 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2841     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2842     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2843     _FEED_NAME = 'subscriptions'
2844     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2845
2846
2847 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2848     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2849     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2850     _FEED_NAME = 'history'
2851     _PLAYLIST_TITLE = 'Youtube History'
2852
2853
2854 class YoutubeTruncatedURLIE(InfoExtractor):
2855     IE_NAME = 'youtube:truncated_url'
2856     IE_DESC = False  # Do not list
2857     _VALID_URL = r'''(?x)
2858         (?:https?://)?
2859         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2860         (?:watch\?(?:
2861             feature=[a-z_]+|
2862             annotation_id=annotation_[^&]+|
2863             x-yt-cl=[0-9]+|
2864             hl=[^&]*|
2865             t=[0-9]+
2866         )?
2867         |
2868             attribution_link\?a=[^&]+
2869         )
2870         $
2871     '''
2872
2873     _TESTS = [{
2874         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2875         'only_matching': True,
2876     }, {
2877         'url': 'https://www.youtube.com/watch?',
2878         'only_matching': True,
2879     }, {
2880         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2881         'only_matching': True,
2882     }, {
2883         'url': 'https://www.youtube.com/watch?feature=foo',
2884         'only_matching': True,
2885     }, {
2886         'url': 'https://www.youtube.com/watch?hl=en-GB',
2887         'only_matching': True,
2888     }, {
2889         'url': 'https://www.youtube.com/watch?t=2372',
2890         'only_matching': True,
2891     }]
2892
2893     def _real_extract(self, url):
2894         raise ExtractorError(
2895             'Did you forget to quote the URL? Remember that & is a meta '
2896             'character in most shells, so you want to put the URL in quotes, '
2897             'like  youtube-dl '
2898             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2899             ' or simply  youtube-dl BaW_jenozKc  .',
2900             expected=True)
2901
2902
2903 class YoutubeTruncatedIDIE(InfoExtractor):
2904     IE_NAME = 'youtube:truncated_id'
2905     IE_DESC = False  # Do not list
2906     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2907
2908     _TESTS = [{
2909         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2910         'only_matching': True,
2911     }]
2912
2913     def _real_extract(self, url):
2914         video_id = self._match_id(url)
2915         raise ExtractorError(
2916             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2917             expected=True)