git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_or_none,
  45     str_to_int,
  46     try_get,
  47     unescapeHTML,
  48     unified_strdate,
  49     unsmuggle_url,
  50     uppercase_escape,
  51     url_or_none,
  52     urlencode_postdata,
  53 )
  54
  55
  56 class YoutubeBaseInfoExtractor(InfoExtractor):
  57     """Provide base functions for Youtube extractors"""
  58     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  59     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  60
  61     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  62     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  63     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  64
  65     _NETRC_MACHINE = 'youtube'
  66     # If True it will raise an error if no login info is provided
  67     _LOGIN_REQUIRED = False
  68
  69     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  70
  71     def _set_language(self):
  72         self._set_cookie(
  73             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  74             # YouTube sets the expire time to about two months
  75             expire_time=time.time() + 2 * 30 * 24 * 3600)
  76
  77     def _ids_to_results(self, ids):
  78         return [
  79             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  80             for vid_id in ids]
  81
  82     def _login(self):
  83         """
  84         Attempt to log in to YouTube.
  85         True is returned if successful or skipped.
  86         False is returned if login failed.
  87
  88         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  89         """
  90         username, password = self._get_login_info()
  91         # No authentication to be performed
  92         if username is None:
  93             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  94                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  95             return True
  96
  97         login_page = self._download_webpage(
  98             self._LOGIN_URL, None,
  99             note='Downloading login page',
 100             errnote='unable to fetch login page', fatal=False)
 101         if login_page is False:
 102             return
 103
 104         login_form = self._hidden_inputs(login_page)
 105
 106         def req(url, f_req, note, errnote):
 107             data = login_form.copy()
 108             data.update({
 109                 'pstMsg': 1,
 110                 'checkConnection': 'youtube',
 111                 'checkedDomains': 'youtube',
 112                 'hl': 'en',
 113                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 114                 'f.req': json.dumps(f_req),
 115                 'flowName': 'GlifWebSignIn',
 116                 'flowEntry': 'ServiceLogin',
 117             })
 118             return self._download_json(
 119                 url, None, note=note, errnote=errnote,
 120                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 121                 fatal=False,
 122                 data=urlencode_postdata(data), headers={
 123                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 124                     'Google-Accounts-XSRF': 1,
 125                 })
 126
 127         def warn(message):
 128             self._downloader.report_warning(message)
 129
 130         lookup_req = [
 131             username,
 132             None, [], None, 'US', None, None, 2, False, True,
 133             [
 134                 None, None,
 135                 [2, 1, None, 1,
 136                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 137                  None, [], 4],
 138                 1, [None, None, []], None, None, None, True
 139             ],
 140             username,
 141         ]
 142
 143         lookup_results = req(
 144             self._LOOKUP_URL, lookup_req,
 145             'Looking up account info', 'Unable to look up account info')
 146
 147         if lookup_results is False:
 148             return False
 149
 150         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 151         if not user_hash:
 152             warn('Unable to extract user hash')
 153             return False
 154
 155         challenge_req = [
 156             user_hash,
 157             None, 1, None, [1, None, None, None, [password, None, True]],
 158             [
 159                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 160                 1, [None, None, []], None, None, None, True
 161             ]]
 162
 163         challenge_results = req(
 164             self._CHALLENGE_URL, challenge_req,
 165             'Logging in', 'Unable to log in')
 166
 167         if challenge_results is False:
 168             return
 169
 170         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 171         if login_res:
 172             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 173             warn(
 174                 'Unable to login: %s' % 'Invalid password'
 175                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 176             return False
 177
 178         res = try_get(challenge_results, lambda x: x[0][-1], list)
 179         if not res:
 180             warn('Unable to extract result entry')
 181             return False
 182
 183         login_challenge = try_get(res, lambda x: x[0][0], list)
 184         if login_challenge:
 185             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 186             if challenge_str == 'TWO_STEP_VERIFICATION':
 187                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 188                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 189                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 190                 if status == 'QUOTA_EXCEEDED':
 191                     warn('Exceeded the limit of TFA codes, try later')
 192                     return False
 193
 194                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 195                 if not tl:
 196                     warn('Unable to extract TL')
 197                     return False
 198
 199                 tfa_code = self._get_tfa_info('2-step verification code')
 200
 201                 if not tfa_code:
 202                     warn(
 203                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 204                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 205                     return False
 206
 207                 tfa_code = remove_start(tfa_code, 'G-')
 208
 209                 tfa_req = [
 210                     user_hash, None, 2, None,
 211                     [
 212                         9, None, None, None, None, None, None, None,
 213                         [None, tfa_code, True, 2]
 214                     ]]
 215
 216                 tfa_results = req(
 217                     self._TFA_URL.format(tl), tfa_req,
 218                     'Submitting TFA code', 'Unable to submit TFA code')
 219
 220                 if tfa_results is False:
 221                     return False
 222
 223                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 224                 if tfa_res:
 225                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 226                     warn(
 227                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 228                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 229                     return False
 230
 231                 check_cookie_url = try_get(
 232                     tfa_results, lambda x: x[0][-1][2], compat_str)
 233             else:
 234                 CHALLENGES = {
 235                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 236                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 237                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 238                 }
 239                 challenge = CHALLENGES.get(
 240                     challenge_str,
 241                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 242                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 243                 return False
 244         else:
 245             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 246
 247         if not check_cookie_url:
 248             warn('Unable to extract CheckCookie URL')
 249             return False
 250
 251         check_cookie_results = self._download_webpage(
 252             check_cookie_url, None, 'Checking cookie', fatal=False)
 253
 254         if check_cookie_results is False:
 255             return False
 256
 257         if 'https://myaccount.google.com/' not in check_cookie_results:
 258             warn('Unable to log in')
 259             return False
 260
 261         return True
 262
 263     def _download_webpage_handle(self, *args, **kwargs):
 264         query = kwargs.get('query', {}).copy()
 265         query['disable_polymer'] = 'true'
 266         kwargs['query'] = query
 267         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 268             *args, **compat_kwargs(kwargs))
 269
 270     def _real_initialize(self):
 271         if self._downloader is None:
 272             return
 273         self._set_language()
 274         if not self._login():
 275             return
 276
 277
 278 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 279     # Extract entries from page with "Load more" button
 280     def _entries(self, page, playlist_id):
 281         more_widget_html = content_html = page
 282         for page_num in itertools.count(1):
 283             for entry in self._process_page(content_html):
 284                 yield entry
 285
 286             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 287             if not mobj:
 288                 break
 289
 290             more = self._download_json(
 291                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 292                 'Downloading page #%s' % page_num,
 293                 transform_source=uppercase_escape)
 294             content_html = more['content_html']
 295             if not content_html.strip():
 296                 # Some webpages show a "Load more" button but they don't
 297                 # have more videos
 298                 break
 299             more_widget_html = more['load_more_widget_html']
 300
 301
 302 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 303     def _process_page(self, content):
 304         for video_id, video_title in self.extract_videos_from_page(content):
 305             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 306
 307     def extract_videos_from_page(self, page):
 308         ids_in_page = []
 309         titles_in_page = []
 310         for mobj in re.finditer(self._VIDEO_RE, page):
 311             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 312             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 313                 continue
 314             video_id = mobj.group('id')
 315             video_title = unescapeHTML(mobj.group('title'))
 316             if video_title:
 317                 video_title = video_title.strip()
 318             try:
 319                 idx = ids_in_page.index(video_id)
 320                 if video_title and not titles_in_page[idx]:
 321                     titles_in_page[idx] = video_title
 322             except ValueError:
 323                 ids_in_page.append(video_id)
 324                 titles_in_page.append(video_title)
 325         return zip(ids_in_page, titles_in_page)
 326
 327
 328 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 329     def _process_page(self, content):
 330         for playlist_id in orderedSet(re.findall(
 331                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 332                 content)):
 333             yield self.url_result(
 334                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 335
 336     def _real_extract(self, url):
 337         playlist_id = self._match_id(url)
 338         webpage = self._download_webpage(url, playlist_id)
 339         title = self._og_search_title(webpage, fatal=False)
 340         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 341
 342
 343 class YoutubeIE(YoutubeBaseInfoExtractor):
 344     IE_DESC = 'YouTube.com'
 345     _VALID_URL = r"""(?x)^
 346                      (
 347                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 348                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 349                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 350                             (?:www\.)?pwnyoutube\.com/|
 351                             (?:www\.)?hooktube\.com/|
 352                             (?:www\.)?yourepeat\.com/|
 353                             tube\.majestyc\.net/|
 354                             (?:www\.)?invidio\.us/|
 355                             (?:www\.)?invidious\.snopyta\.org/|
 356                             (?:www\.)?invidious\.kabi\.tk/|
 357                             (?:www\.)?vid\.wxzm\.sx/|
 358                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 359                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 360                          (?:                                                  # the various things that can precede the ID:
 361                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 362                              |(?:                                             # or the v= param in all its forms
 363                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 364                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 365                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 366                                  v=
 367                              )
 368                          ))
 369                          |(?:
 370                             youtu\.be|                                        # just youtu.be/xxxx
 371                             vid\.plus|                                        # or vid.plus/xxxx
 372                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 373                          )/
 374                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 375                          )
 376                      )?                                                       # all until now is optional -> you can pass the naked ID
 377                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 378                      (?!.*?\blist=
 379                         (?:
 380                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 381                             WL                                                # WL are handled by the watch later IE
 382                         )
 383                      )
 384                      (?(1).+)?                                                # if we found the ID, everything can follow
 385                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 386     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 387     _formats = {
 388         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 389         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 390         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 391         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 392         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 393         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 394         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 395         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 396         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 397         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 398         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 399         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 400         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 401         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 402         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 403         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 404         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 405         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 406
 407
 408         # 3D videos
 409         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 410         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 411         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 412         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 413         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 414         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 415         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 416
 417         # Apple HTTP Live Streaming
 418         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 419         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 420         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 421         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 422         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 423         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 424         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 425         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 426
 427         # DASH mp4 video
 428         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 429         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 430         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 431         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 432         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 433         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 434         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 435         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 436         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 437         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 438         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 439         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 440
 441         # Dash mp4 audio
 442         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 443         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 444         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 445         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 446         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 447         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 448         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 449
 450         # Dash webm
 451         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 452         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 453         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 454         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 455         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 456         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 457         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 458         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 459         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 461         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 462         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 463         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 464         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 465         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 466         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 467         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 468         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 469         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 470         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 471         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 472         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 473
 474         # Dash webm audio
 475         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 476         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 477
 478         # Dash webm audio with opus inside
 479         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 480         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 481         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 482
 483         # RTMP (unnamed)
 484         '_rtmp': {'protocol': 'rtmp'},
 485     }
 486     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 487
 488     _GEO_BYPASS = False
 489
 490     IE_NAME = 'youtube'
 491     _TESTS = [
 492         {
 493             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 494             'info_dict': {
 495                 'id': 'BaW_jenozKc',
 496                 'ext': 'mp4',
 497                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 498                 'uploader': 'Philipp Hagemeister',
 499                 'uploader_id': 'phihag',
 500                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 501                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 502                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 503                 'upload_date': '20121002',
 504                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 505                 'categories': ['Science & Technology'],
 506                 'tags': ['youtube-dl'],
 507                 'duration': 10,
 508                 'view_count': int,
 509                 'like_count': int,
 510                 'dislike_count': int,
 511                 'start_time': 1,
 512                 'end_time': 9,
 513             }
 514         },
 515         {
 516             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 517             'note': 'Test generic use_cipher_signature video (#897)',
 518             'info_dict': {
 519                 'id': 'UxxajLWwzqY',
 520                 'ext': 'mp4',
 521                 'upload_date': '20120506',
 522                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 523                 'alt_title': 'I Love It (feat. Charli XCX)',
 524                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 525                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 526                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 527                          'iconic ep', 'iconic', 'love', 'it'],
 528                 'duration': 180,
 529                 'uploader': 'Icona Pop',
 530                 'uploader_id': 'IconaPop',
 531                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 532                 'creator': 'Icona Pop',
 533                 'track': 'I Love It (feat. Charli XCX)',
 534                 'artist': 'Icona Pop',
 535             }
 536         },
 537         {
 538             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 539             'note': 'Test VEVO video with age protection (#956)',
 540             'info_dict': {
 541                 'id': '07FYdnEawAQ',
 542                 'ext': 'mp4',
 543                 'upload_date': '20130703',
 544                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 545                 'alt_title': 'Tunnel Vision',
 546                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 547                 'duration': 419,
 548                 'uploader': 'justintimberlakeVEVO',
 549                 'uploader_id': 'justintimberlakeVEVO',
 550                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 551                 'creator': 'Justin Timberlake',
 552                 'track': 'Tunnel Vision',
 553                 'artist': 'Justin Timberlake',
 554                 'age_limit': 18,
 555             }
 556         },
 557         {
 558             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 559             'note': 'Embed-only video (#1746)',
 560             'info_dict': {
 561                 'id': 'yZIXLfi8CZQ',
 562                 'ext': 'mp4',
 563                 'upload_date': '20120608',
 564                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 565                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 566                 'uploader': 'SET India',
 567                 'uploader_id': 'setindia',
 568                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 569                 'age_limit': 18,
 570             }
 571         },
 572         {
 573             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 574             'note': 'Use the first video ID in the URL',
 575             'info_dict': {
 576                 'id': 'BaW_jenozKc',
 577                 'ext': 'mp4',
 578                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 579                 'uploader': 'Philipp Hagemeister',
 580                 'uploader_id': 'phihag',
 581                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 582                 'upload_date': '20121002',
 583                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 584                 'categories': ['Science & Technology'],
 585                 'tags': ['youtube-dl'],
 586                 'duration': 10,
 587                 'view_count': int,
 588                 'like_count': int,
 589                 'dislike_count': int,
 590             },
 591             'params': {
 592                 'skip_download': True,
 593             },
 594         },
 595         {
 596             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 597             'note': '256k DASH audio (format 141) via DASH manifest',
 598             'info_dict': {
 599                 'id': 'a9LDPn-MO4I',
 600                 'ext': 'm4a',
 601                 'upload_date': '20121002',
 602                 'uploader_id': '8KVIDEO',
 603                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 604                 'description': '',
 605                 'uploader': '8KVIDEO',
 606                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 607             },
 608             'params': {
 609                 'youtube_include_dash_manifest': True,
 610                 'format': '141',
 611             },
 612             'skip': 'format 141 not served anymore',
 613         },
 614         # DASH manifest with encrypted signature
 615         {
 616             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 617             'info_dict': {
 618                 'id': 'IB3lcPjvWLA',
 619                 'ext': 'm4a',
 620                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 621                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 622                 'duration': 244,
 623                 'uploader': 'AfrojackVEVO',
 624                 'uploader_id': 'AfrojackVEVO',
 625                 'upload_date': '20131011',
 626             },
 627             'params': {
 628                 'youtube_include_dash_manifest': True,
 629                 'format': '141/bestaudio[ext=m4a]',
 630             },
 631         },
 632         # JS player signature function name containing $
 633         {
 634             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 635             'info_dict': {
 636                 'id': 'nfWlot6h_JM',
 637                 'ext': 'm4a',
 638                 'title': 'Taylor Swift - Shake It Off',
 639                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
 640                 'duration': 242,
 641                 'uploader': 'TaylorSwiftVEVO',
 642                 'uploader_id': 'TaylorSwiftVEVO',
 643                 'upload_date': '20140818',
 644                 'creator': 'Taylor Swift',
 645             },
 646             'params': {
 647                 'youtube_include_dash_manifest': True,
 648                 'format': '141/bestaudio[ext=m4a]',
 649             },
 650         },
 651         # Controversy video
 652         {
 653             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 654             'info_dict': {
 655                 'id': 'T4XJQO3qol8',
 656                 'ext': 'mp4',
 657                 'duration': 219,
 658                 'upload_date': '20100909',
 659                 'uploader': 'Amazing Atheist',
 660                 'uploader_id': 'TheAmazingAtheist',
 661                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 662                 'title': 'Burning Everyone\'s Koran',
 663                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 664             }
 665         },
 666         # Normal age-gate video (No vevo, embed allowed)
 667         {
 668             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 669             'info_dict': {
 670                 'id': 'HtVdAasjOgU',
 671                 'ext': 'mp4',
 672                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 673                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 674                 'duration': 142,
 675                 'uploader': 'The Witcher',
 676                 'uploader_id': 'WitcherGame',
 677                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 678                 'upload_date': '20140605',
 679                 'age_limit': 18,
 680             },
 681         },
 682         # Age-gate video with encrypted signature
 683         {
 684             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 685             'info_dict': {
 686                 'id': '6kLq3WMV1nU',
 687                 'ext': 'mp4',
 688                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 689                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 690                 'duration': 246,
 691                 'uploader': 'LloydVEVO',
 692                 'uploader_id': 'LloydVEVO',
 693                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 694                 'upload_date': '20110629',
 695                 'age_limit': 18,
 696             },
 697         },
 698         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 699         # YouTube Red ad is not captured for creator
 700         {
 701             'url': '__2ABJjxzNo',
 702             'info_dict': {
 703                 'id': '__2ABJjxzNo',
 704                 'ext': 'mp4',
 705                 'duration': 266,
 706                 'upload_date': '20100430',
 707                 'uploader_id': 'deadmau5',
 708                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 709                 'creator': 'deadmau5',
 710                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 711                 'uploader': 'deadmau5',
 712                 'title': 'Deadmau5 - Some Chords (HD)',
 713                 'alt_title': 'Some Chords',
 714             },
 715             'expected_warnings': [
 716                 'DASH manifest missing',
 717             ]
 718         },
 719         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 720         {
 721             'url': 'lqQg6PlCWgI',
 722             'info_dict': {
 723                 'id': 'lqQg6PlCWgI',
 724                 'ext': 'mp4',
 725                 'duration': 6085,
 726                 'upload_date': '20150827',
 727                 'uploader_id': 'olympic',
 728                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 729                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 730                 'uploader': 'Olympic',
 731                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 732             },
 733             'params': {
 734                 'skip_download': 'requires avconv',
 735             }
 736         },
 737         # Non-square pixels
 738         {
 739             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 740             'info_dict': {
 741                 'id': '_b-2C3KPAM0',
 742                 'ext': 'mp4',
 743                 'stretched_ratio': 16 / 9.,
 744                 'duration': 85,
 745                 'upload_date': '20110310',
 746                 'uploader_id': 'AllenMeow',
 747                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 748                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 749                 'uploader': '孫ᄋᄅ',
 750                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 751             },
 752         },
 753         # url_encoded_fmt_stream_map is empty string
 754         {
 755             'url': 'qEJwOuvDf7I',
 756             'info_dict': {
 757                 'id': 'qEJwOuvDf7I',
 758                 'ext': 'webm',
 759                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 760                 'description': '',
 761                 'upload_date': '20150404',
 762                 'uploader_id': 'spbelect',
 763                 'uploader': 'Наблюдатели Петербурга',
 764             },
 765             'params': {
 766                 'skip_download': 'requires avconv',
 767             },
 768             'skip': 'This live event has ended.',
 769         },
 770         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 771         {
 772             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 773             'info_dict': {
 774                 'id': 'FIl7x6_3R5Y',
 775                 'ext': 'webm',
 776                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 777                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 778                 'duration': 220,
 779                 'upload_date': '20150625',
 780                 'uploader_id': 'dorappi2000',
 781                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 782                 'uploader': 'dorappi2000',
 783                 'formats': 'mincount:31',
 784             },
 785             'skip': 'not actual anymore',
 786         },
 787         # DASH manifest with segment_list
 788         {
 789             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 790             'md5': '8ce563a1d667b599d21064e982ab9e31',
 791             'info_dict': {
 792                 'id': 'CsmdDsKjzN8',
 793                 'ext': 'mp4',
 794                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 795                 'uploader': 'Airtek',
 796                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 797                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 798                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 799             },
 800             'params': {
 801                 'youtube_include_dash_manifest': True,
 802                 'format': '135',  # bestvideo
 803             },
 804             'skip': 'This live event has ended.',
 805         },
 806         {
 807             # Multifeed videos (multiple cameras), URL is for Main Camera
 808             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 809             'info_dict': {
 810                 'id': 'jqWvoWXjCVs',
 811                 'title': 'teamPGP: Rocket League Noob Stream',
 812                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 813             },
 814             'playlist': [{
 815                 'info_dict': {
 816                     'id': 'jqWvoWXjCVs',
 817                     'ext': 'mp4',
 818                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 819                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 820                     'duration': 7335,
 821                     'upload_date': '20150721',
 822                     'uploader': 'Beer Games Beer',
 823                     'uploader_id': 'beergamesbeer',
 824                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 825                     'license': 'Standard YouTube License',
 826                 },
 827             }, {
 828                 'info_dict': {
 829                     'id': '6h8e8xoXJzg',
 830                     'ext': 'mp4',
 831                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 832                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 833                     'duration': 7337,
 834                     'upload_date': '20150721',
 835                     'uploader': 'Beer Games Beer',
 836                     'uploader_id': 'beergamesbeer',
 837                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 838                     'license': 'Standard YouTube License',
 839                 },
 840             }, {
 841                 'info_dict': {
 842                     'id': 'PUOgX5z9xZw',
 843                     'ext': 'mp4',
 844                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 845                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 846                     'duration': 7337,
 847                     'upload_date': '20150721',
 848                     'uploader': 'Beer Games Beer',
 849                     'uploader_id': 'beergamesbeer',
 850                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 851                     'license': 'Standard YouTube License',
 852                 },
 853             }, {
 854                 'info_dict': {
 855                     'id': 'teuwxikvS5k',
 856                     'ext': 'mp4',
 857                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 858                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 859                     'duration': 7334,
 860                     'upload_date': '20150721',
 861                     'uploader': 'Beer Games Beer',
 862                     'uploader_id': 'beergamesbeer',
 863                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 864                     'license': 'Standard YouTube License',
 865                 },
 866             }],
 867             'params': {
 868                 'skip_download': True,
 869             },
 870             'skip': 'This video is not available.',
 871         },
 872         {
 873             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 874             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 875             'info_dict': {
 876                 'id': 'gVfLd0zydlo',
 877                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 878             },
 879             'playlist_count': 2,
 880             'skip': 'Not multifeed anymore',
 881         },
 882         {
 883             'url': 'https://vid.plus/FlRa-iH7PGw',
 884             'only_matching': True,
 885         },
 886         {
 887             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 888             'only_matching': True,
 889         },
 890         {
 891             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 892             # Also tests cut-off URL expansion in video description (see
 893             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 894             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 895             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 896             'info_dict': {
 897                 'id': 'lsguqyKfVQg',
 898                 'ext': 'mp4',
 899                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 900                 'alt_title': 'Dark Walk - Position Music',
 901                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 902                 'duration': 133,
 903                 'upload_date': '20151119',
 904                 'uploader_id': 'IronSoulElf',
 905                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 906                 'uploader': 'IronSoulElf',
 907                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 908                 'track': 'Dark Walk - Position Music',
 909                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 910             },
 911             'params': {
 912                 'skip_download': True,
 913             },
 914         },
 915         {
 916             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 917             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 918             'only_matching': True,
 919         },
 920         {
 921             # Video with yt:stretch=17:0
 922             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 923             'info_dict': {
 924                 'id': 'Q39EVAstoRM',
 925                 'ext': 'mp4',
 926                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 927                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 928                 'upload_date': '20151107',
 929                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 930                 'uploader': 'CH GAMER DROID',
 931             },
 932             'params': {
 933                 'skip_download': True,
 934             },
 935             'skip': 'This video does not exist.',
 936         },
 937         {
 938             # Video licensed under Creative Commons
 939             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 940             'info_dict': {
 941                 'id': 'M4gD1WSo5mA',
 942                 'ext': 'mp4',
 943                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 944                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 945                 'duration': 721,
 946                 'upload_date': '20150127',
 947                 'uploader_id': 'BerkmanCenter',
 948                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 949                 'uploader': 'The Berkman Klein Center for Internet & Society',
 950                 'license': 'Creative Commons Attribution license (reuse allowed)',
 951             },
 952             'params': {
 953                 'skip_download': True,
 954             },
 955         },
 956         {
 957             # Channel-like uploader_url
 958             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 959             'info_dict': {
 960                 'id': 'eQcmzGIKrzg',
 961                 'ext': 'mp4',
 962                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 963                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 964                 'duration': 4060,
 965                 'upload_date': '20151119',
 966                 'uploader': 'Bernie Sanders',
 967                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 968                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 969                 'license': 'Creative Commons Attribution license (reuse allowed)',
 970             },
 971             'params': {
 972                 'skip_download': True,
 973             },
 974         },
 975         {
 976             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 977             'only_matching': True,
 978         },
 979         {
 980             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
 981             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 982             'only_matching': True,
 983         },
 984         {
 985             # Rental video preview
 986             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 987             'info_dict': {
 988                 'id': 'uGpuVWrhIzE',
 989                 'ext': 'mp4',
 990                 'title': 'Piku - Trailer',
 991                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 992                 'upload_date': '20150811',
 993                 'uploader': 'FlixMatrix',
 994                 'uploader_id': 'FlixMatrixKaravan',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 996                 'license': 'Standard YouTube License',
 997             },
 998             'params': {
 999                 'skip_download': True,
1000             },
1001             'skip': 'This video is not available.',
1002         },
1003         {
1004             # YouTube Red video with episode data
1005             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1006             'info_dict': {
1007                 'id': 'iqKdEhx-dD4',
1008                 'ext': 'mp4',
1009                 'title': 'Isolation - Mind Field (Ep 1)',
1010                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1011                 'duration': 2085,
1012                 'upload_date': '20170118',
1013                 'uploader': 'Vsauce',
1014                 'uploader_id': 'Vsauce',
1015                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1016                 'series': 'Mind Field',
1017                 'season_number': 1,
1018                 'episode_number': 1,
1019             },
1020             'params': {
1021                 'skip_download': True,
1022             },
1023             'expected_warnings': [
1024                 'Skipping DASH manifest',
1025             ],
1026         },
1027         {
1028             # The following content has been identified by the YouTube community
1029             # as inappropriate or offensive to some audiences.
1030             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1031             'info_dict': {
1032                 'id': '6SJNVb0GnPI',
1033                 'ext': 'mp4',
1034                 'title': 'Race Differences in Intelligence',
1035                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1036                 'duration': 965,
1037                 'upload_date': '20140124',
1038                 'uploader': 'New Century Foundation',
1039                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1040                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1041             },
1042             'params': {
1043                 'skip_download': True,
1044             },
1045         },
1046         {
1047             # itag 212
1048             'url': '1t24XAntNCY',
1049             'only_matching': True,
1050         },
1051         {
1052             # geo restricted to JP
1053             'url': 'sJL6WA-aGkQ',
1054             'only_matching': True,
1055         },
1056         {
1057             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1058             'only_matching': True,
1059         },
1060         {
1061             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1062             'only_matching': True,
1063         },
1064         {
1065             # DRM protected
1066             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1067             'only_matching': True,
1068         },
1069         {
1070             # Video with unsupported adaptive stream type formats
1071             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1072             'info_dict': {
1073                 'id': 'Z4Vy8R84T1U',
1074                 'ext': 'mp4',
1075                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1076                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1077                 'duration': 433,
1078                 'upload_date': '20130923',
1079                 'uploader': 'Amelia Putri Harwita',
1080                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1082                 'formats': 'maxcount:10',
1083             },
1084             'params': {
1085                 'skip_download': True,
1086                 'youtube_include_dash_manifest': False,
1087             },
1088         }
1089     ]
1090
1091     def __init__(self, *args, **kwargs):
1092         super(YoutubeIE, self).__init__(*args, **kwargs)
1093         self._player_cache = {}
1094
1095     def report_video_info_webpage_download(self, video_id):
1096         """Report attempt to download video info webpage."""
1097         self.to_screen('%s: Downloading video info webpage' % video_id)
1098
1099     def report_information_extraction(self, video_id):
1100         """Report attempt to extract video information."""
1101         self.to_screen('%s: Extracting video information' % video_id)
1102
1103     def report_unavailable_format(self, video_id, format):
1104         """Report extracted video URL."""
1105         self.to_screen('%s: Format %s not available' % (video_id, format))
1106
1107     def report_rtmp_download(self):
1108         """Indicate the download will use the RTMP protocol."""
1109         self.to_screen('RTMP download detected')
1110
1111     def _signature_cache_id(self, example_sig):
1112         """ Return a string representation of a signature """
1113         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1114
1115     def _extract_signature_function(self, video_id, player_url, example_sig):
1116         id_m = re.match(
1117             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1118             player_url)
1119         if not id_m:
1120             raise ExtractorError('Cannot identify player %r' % player_url)
1121         player_type = id_m.group('ext')
1122         player_id = id_m.group('id')
1123
1124         # Read from filesystem cache
1125         func_id = '%s_%s_%s' % (
1126             player_type, player_id, self._signature_cache_id(example_sig))
1127         assert os.path.basename(func_id) == func_id
1128
1129         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1130         if cache_spec is not None:
1131             return lambda s: ''.join(s[i] for i in cache_spec)
1132
1133         download_note = (
1134             'Downloading player %s' % player_url
1135             if self._downloader.params.get('verbose') else
1136             'Downloading %s player %s' % (player_type, player_id)
1137         )
1138         if player_type == 'js':
1139             code = self._download_webpage(
1140                 player_url, video_id,
1141                 note=download_note,
1142                 errnote='Download of %s failed' % player_url)
1143             res = self._parse_sig_js(code)
1144         elif player_type == 'swf':
1145             urlh = self._request_webpage(
1146                 player_url, video_id,
1147                 note=download_note,
1148                 errnote='Download of %s failed' % player_url)
1149             code = urlh.read()
1150             res = self._parse_sig_swf(code)
1151         else:
1152             assert False, 'Invalid player type %r' % player_type
1153
1154         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1155         cache_res = res(test_string)
1156         cache_spec = [ord(c) for c in cache_res]
1157
1158         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1159         return res
1160
1161     def _print_sig_code(self, func, example_sig):
1162         def gen_sig_code(idxs):
1163             def _genslice(start, end, step):
1164                 starts = '' if start == 0 else str(start)
1165                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1166                 steps = '' if step == 1 else (':%d' % step)
1167                 return 's[%s%s%s]' % (starts, ends, steps)
1168
1169             step = None
1170             # Quelch pyflakes warnings - start will be set when step is set
1171             start = '(Never used)'
1172             for i, prev in zip(idxs[1:], idxs[:-1]):
1173                 if step is not None:
1174                     if i - prev == step:
1175                         continue
1176                     yield _genslice(start, prev, step)
1177                     step = None
1178                     continue
1179                 if i - prev in [-1, 1]:
1180                     step = i - prev
1181                     start = prev
1182                     continue
1183                 else:
1184                     yield 's[%d]' % prev
1185             if step is None:
1186                 yield 's[%d]' % i
1187             else:
1188                 yield _genslice(start, i, step)
1189
1190         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1191         cache_res = func(test_string)
1192         cache_spec = [ord(c) for c in cache_res]
1193         expr_code = ' + '.join(gen_sig_code(cache_spec))
1194         signature_id_tuple = '(%s)' % (
1195             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1196         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1197                 '    return %s\n') % (signature_id_tuple, expr_code)
1198         self.to_screen('Extracted signature function:\n' + code)
1199
1200     def _parse_sig_js(self, jscode):
1201         funcname = self._search_regex(
1202             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1203              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1204              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
1205              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1206              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1207             jscode, 'Initial JS player signature function name', group='sig')
1208
1209         jsi = JSInterpreter(jscode)
1210         initial_function = jsi.extract_function(funcname)
1211         return lambda s: initial_function([s])
1212
1213     def _parse_sig_swf(self, file_contents):
1214         swfi = SWFInterpreter(file_contents)
1215         TARGET_CLASSNAME = 'SignatureDecipher'
1216         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1217         initial_function = swfi.extract_function(searched_class, 'decipher')
1218         return lambda s: initial_function([s])
1219
1220     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1221         """Turn the encrypted s field into a working signature"""
1222
1223         if player_url is None:
1224             raise ExtractorError('Cannot decrypt signature without player_url')
1225
1226         if player_url.startswith('//'):
1227             player_url = 'https:' + player_url
1228         elif not re.match(r'https?://', player_url):
1229             player_url = compat_urlparse.urljoin(
1230                 'https://www.youtube.com', player_url)
1231         try:
1232             player_id = (player_url, self._signature_cache_id(s))
1233             if player_id not in self._player_cache:
1234                 func = self._extract_signature_function(
1235                     video_id, player_url, s
1236                 )
1237                 self._player_cache[player_id] = func
1238             func = self._player_cache[player_id]
1239             if self._downloader.params.get('youtube_print_sig_code'):
1240                 self._print_sig_code(func, s)
1241             return func(s)
1242         except Exception as e:
1243             tb = traceback.format_exc()
1244             raise ExtractorError(
1245                 'Signature extraction failed: ' + tb, cause=e)
1246
1247     def _get_subtitles(self, video_id, webpage):
1248         try:
1249             subs_doc = self._download_xml(
1250                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1251                 video_id, note=False)
1252         except ExtractorError as err:
1253             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1254             return {}
1255
1256         sub_lang_list = {}
1257         for track in subs_doc.findall('track'):
1258             lang = track.attrib['lang_code']
1259             if lang in sub_lang_list:
1260                 continue
1261             sub_formats = []
1262             for ext in self._SUBTITLE_FORMATS:
1263                 params = compat_urllib_parse_urlencode({
1264                     'lang': lang,
1265                     'v': video_id,
1266                     'fmt': ext,
1267                     'name': track.attrib['name'].encode('utf-8'),
1268                 })
1269                 sub_formats.append({
1270                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1271                     'ext': ext,
1272                 })
1273             sub_lang_list[lang] = sub_formats
1274         if not sub_lang_list:
1275             self._downloader.report_warning('video doesn\'t have subtitles')
1276             return {}
1277         return sub_lang_list
1278
1279     def _get_ytplayer_config(self, video_id, webpage):
1280         patterns = (
1281             # User data may contain arbitrary character sequences that may affect
1282             # JSON extraction with regex, e.g. when '};' is contained the second
1283             # regex won't capture the whole JSON. Yet working around by trying more
1284             # concrete regex first keeping in mind proper quoted string handling
1285             # to be implemented in future that will replace this workaround (see
1286             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1287             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1288             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1289             r';ytplayer\.config\s*=\s*({.+?});',
1290         )
1291         config = self._search_regex(
1292             patterns, webpage, 'ytplayer.config', default=None)
1293         if config:
1294             return self._parse_json(
1295                 uppercase_escape(config), video_id, fatal=False)
1296
1297     def _get_automatic_captions(self, video_id, webpage):
1298         """We need the webpage for getting the captions url, pass it as an
1299            argument to speed up the process."""
1300         self.to_screen('%s: Looking for automatic captions' % video_id)
1301         player_config = self._get_ytplayer_config(video_id, webpage)
1302         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1303         if not player_config:
1304             self._downloader.report_warning(err_msg)
1305             return {}
1306         try:
1307             args = player_config['args']
1308             caption_url = args.get('ttsurl')
1309             if caption_url:
1310                 timestamp = args['timestamp']
1311                 # We get the available subtitles
1312                 list_params = compat_urllib_parse_urlencode({
1313                     'type': 'list',
1314                     'tlangs': 1,
1315                     'asrs': 1,
1316                 })
1317                 list_url = caption_url + '&' + list_params
1318                 caption_list = self._download_xml(list_url, video_id)
1319                 original_lang_node = caption_list.find('track')
1320                 if original_lang_node is None:
1321                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1322                     return {}
1323                 original_lang = original_lang_node.attrib['lang_code']
1324                 caption_kind = original_lang_node.attrib.get('kind', '')
1325
1326                 sub_lang_list = {}
1327                 for lang_node in caption_list.findall('target'):
1328                     sub_lang = lang_node.attrib['lang_code']
1329                     sub_formats = []
1330                     for ext in self._SUBTITLE_FORMATS:
1331                         params = compat_urllib_parse_urlencode({
1332                             'lang': original_lang,
1333                             'tlang': sub_lang,
1334                             'fmt': ext,
1335                             'ts': timestamp,
1336                             'kind': caption_kind,
1337                         })
1338                         sub_formats.append({
1339                             'url': caption_url + '&' + params,
1340                             'ext': ext,
1341                         })
1342                     sub_lang_list[sub_lang] = sub_formats
1343                 return sub_lang_list
1344
1345             def make_captions(sub_url, sub_langs):
1346                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1347                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1348                 captions = {}
1349                 for sub_lang in sub_langs:
1350                     sub_formats = []
1351                     for ext in self._SUBTITLE_FORMATS:
1352                         caption_qs.update({
1353                             'tlang': [sub_lang],
1354                             'fmt': [ext],
1355                         })
1356                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1357                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1358                         sub_formats.append({
1359                             'url': sub_url,
1360                             'ext': ext,
1361                         })
1362                     captions[sub_lang] = sub_formats
1363                 return captions
1364
1365             # New captions format as of 22.06.2017
1366             player_response = args.get('player_response')
1367             if player_response and isinstance(player_response, compat_str):
1368                 player_response = self._parse_json(
1369                     player_response, video_id, fatal=False)
1370                 if player_response:
1371                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1372                     base_url = renderer['captionTracks'][0]['baseUrl']
1373                     sub_lang_list = []
1374                     for lang in renderer['translationLanguages']:
1375                         lang_code = lang.get('languageCode')
1376                         if lang_code:
1377                             sub_lang_list.append(lang_code)
1378                     return make_captions(base_url, sub_lang_list)
1379
1380             # Some videos don't provide ttsurl but rather caption_tracks and
1381             # caption_translation_languages (e.g. 20LmZk1hakA)
1382             # Does not used anymore as of 22.06.2017
1383             caption_tracks = args['caption_tracks']
1384             caption_translation_languages = args['caption_translation_languages']
1385             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1386             sub_lang_list = []
1387             for lang in caption_translation_languages.split(','):
1388                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1389                 sub_lang = lang_qs.get('lc', [None])[0]
1390                 if sub_lang:
1391                     sub_lang_list.append(sub_lang)
1392             return make_captions(caption_url, sub_lang_list)
1393         # An extractor error can be raise by the download process if there are
1394         # no automatic captions but there are subtitles
1395         except (KeyError, IndexError, ExtractorError):
1396             self._downloader.report_warning(err_msg)
1397             return {}
1398
1399     def _mark_watched(self, video_id, video_info, player_response):
1400         playback_url = url_or_none(try_get(
1401             player_response,
1402             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1403             video_info, lambda x: x['videostats_playback_base_url'][0]))
1404         if not playback_url:
1405             return
1406         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1407         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1408
1409         # cpn generation algorithm is reverse engineered from base.js.
1410         # In fact it works even with dummy cpn.
1411         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1412         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1413
1414         qs.update({
1415             'ver': ['2'],
1416             'cpn': [cpn],
1417         })
1418         playback_url = compat_urlparse.urlunparse(
1419             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1420
1421         self._download_webpage(
1422             playback_url, video_id, 'Marking watched',
1423             'Unable to mark watched', fatal=False)
1424
1425     @staticmethod
1426     def _extract_urls(webpage):
1427         # Embedded YouTube player
1428         entries = [
1429             unescapeHTML(mobj.group('url'))
1430             for mobj in re.finditer(r'''(?x)
1431             (?:
1432                 <iframe[^>]+?src=|
1433                 data-video-url=|
1434                 <embed[^>]+?src=|
1435                 embedSWF\(?:\s*|
1436                 <object[^>]+data=|
1437                 new\s+SWFObject\(
1438             )
1439             (["\'])
1440                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1441                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1442             \1''', webpage)]
1443
1444         # lazyYT YouTube embed
1445         entries.extend(list(map(
1446             unescapeHTML,
1447             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1448
1449         # Wordpress "YouTube Video Importer" plugin
1450         matches = re.findall(r'''(?x)<div[^>]+
1451             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1452             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1453         entries.extend(m[-1] for m in matches)
1454
1455         return entries
1456
1457     @staticmethod
1458     def _extract_url(webpage):
1459         urls = YoutubeIE._extract_urls(webpage)
1460         return urls[0] if urls else None
1461
1462     @classmethod
1463     def extract_id(cls, url):
1464         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1465         if mobj is None:
1466             raise ExtractorError('Invalid URL: %s' % url)
1467         video_id = mobj.group(2)
1468         return video_id
1469
1470     def _extract_annotations(self, video_id):
1471         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1472         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1473
1474     @staticmethod
1475     def _extract_chapters(description, duration):
1476         if not description:
1477             return None
1478         chapter_lines = re.findall(
1479             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1480             description)
1481         if not chapter_lines:
1482             return None
1483         chapters = []
1484         for next_num, (chapter_line, time_point) in enumerate(
1485                 chapter_lines, start=1):
1486             start_time = parse_duration(time_point)
1487             if start_time is None:
1488                 continue
1489             if start_time > duration:
1490                 break
1491             end_time = (duration if next_num == len(chapter_lines)
1492                         else parse_duration(chapter_lines[next_num][1]))
1493             if end_time is None:
1494                 continue
1495             if end_time > duration:
1496                 end_time = duration
1497             if start_time > end_time:
1498                 break
1499             chapter_title = re.sub(
1500                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1501             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1502             chapters.append({
1503                 'start_time': start_time,
1504                 'end_time': end_time,
1505                 'title': chapter_title,
1506             })
1507         return chapters
1508
1509     def _real_extract(self, url):
1510         url, smuggled_data = unsmuggle_url(url, {})
1511
1512         proto = (
1513             'http' if self._downloader.params.get('prefer_insecure', False)
1514             else 'https')
1515
1516         start_time = None
1517         end_time = None
1518         parsed_url = compat_urllib_parse_urlparse(url)
1519         for component in [parsed_url.fragment, parsed_url.query]:
1520             query = compat_parse_qs(component)
1521             if start_time is None and 't' in query:
1522                 start_time = parse_duration(query['t'][0])
1523             if start_time is None and 'start' in query:
1524                 start_time = parse_duration(query['start'][0])
1525             if end_time is None and 'end' in query:
1526                 end_time = parse_duration(query['end'][0])
1527
1528         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1529         mobj = re.search(self._NEXT_URL_RE, url)
1530         if mobj:
1531             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1532         video_id = self.extract_id(url)
1533
1534         # Get video webpage
1535         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1536         video_webpage = self._download_webpage(url, video_id)
1537
1538         # Attempt to extract SWF player URL
1539         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1540         if mobj is not None:
1541             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1542         else:
1543             player_url = None
1544
1545         dash_mpds = []
1546
1547         def add_dash_mpd(video_info):
1548             dash_mpd = video_info.get('dashmpd')
1549             if dash_mpd and dash_mpd[0] not in dash_mpds:
1550                 dash_mpds.append(dash_mpd[0])
1551
1552         def add_dash_mpd_pr(pl_response):
1553             dash_mpd = url_or_none(try_get(
1554                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1555                 compat_str))
1556             if dash_mpd and dash_mpd not in dash_mpds:
1557                 dash_mpds.append(dash_mpd)
1558
1559         is_live = None
1560         view_count = None
1561
1562         def extract_view_count(v_info):
1563             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1564
1565         player_response = {}
1566
1567         # Get video info
1568         embed_webpage = None
1569         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1570             age_gate = True
1571             # We simulate the access to the video from www.youtube.com/v/{video_id}
1572             # this can be viewed without login into Youtube
1573             url = proto + '://www.youtube.com/embed/%s' % video_id
1574             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1575             data = compat_urllib_parse_urlencode({
1576                 'video_id': video_id,
1577                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1578                 'sts': self._search_regex(
1579                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1580             })
1581             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1582             video_info_webpage = self._download_webpage(
1583                 video_info_url, video_id,
1584                 note='Refetching age-gated info webpage',
1585                 errnote='unable to download video info webpage')
1586             video_info = compat_parse_qs(video_info_webpage)
1587             add_dash_mpd(video_info)
1588         else:
1589             age_gate = False
1590             video_info = None
1591             sts = None
1592             # Try looking directly into the video webpage
1593             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1594             if ytplayer_config:
1595                 args = ytplayer_config['args']
1596                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1597                     # Convert to the same format returned by compat_parse_qs
1598                     video_info = dict((k, [v]) for k, v in args.items())
1599                     add_dash_mpd(video_info)
1600                 # Rental video is not rented but preview is available (e.g.
1601                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1602                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1603                 if not video_info and args.get('ypc_vid'):
1604                     return self.url_result(
1605                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1606                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1607                     is_live = True
1608                 sts = ytplayer_config.get('sts')
1609                 if not player_response:
1610                     pl_response = str_or_none(args.get('player_response'))
1611                     if pl_response:
1612                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1613                         if isinstance(pl_response, dict):
1614                             player_response = pl_response
1615             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1616                 add_dash_mpd_pr(player_response)
1617                 # We also try looking in get_video_info since it may contain different dashmpd
1618                 # URL that points to a DASH manifest with possibly different itag set (some itags
1619                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1620                 # manifest pointed by get_video_info's dashmpd).
1621                 # The general idea is to take a union of itags of both DASH manifests (for example
1622                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1623                 self.report_video_info_webpage_download(video_id)
1624                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1625                     query = {
1626                         'video_id': video_id,
1627                         'ps': 'default',
1628                         'eurl': '',
1629                         'gl': 'US',
1630                         'hl': 'en',
1631                     }
1632                     if el:
1633                         query['el'] = el
1634                     if sts:
1635                         query['sts'] = sts
1636                     video_info_webpage = self._download_webpage(
1637                         '%s://www.youtube.com/get_video_info' % proto,
1638                         video_id, note=False,
1639                         errnote='unable to download video info webpage',
1640                         fatal=False, query=query)
1641                     if not video_info_webpage:
1642                         continue
1643                     get_video_info = compat_parse_qs(video_info_webpage)
1644                     if not player_response:
1645                         pl_response = get_video_info.get('player_response', [None])[0]
1646                         if isinstance(pl_response, dict):
1647                             player_response = pl_response
1648                             add_dash_mpd_pr(player_response)
1649                     add_dash_mpd(get_video_info)
1650                     if view_count is None:
1651                         view_count = extract_view_count(get_video_info)
1652                     if not video_info:
1653                         video_info = get_video_info
1654                     if 'token' in get_video_info:
1655                         # Different get_video_info requests may report different results, e.g.
1656                         # some may report video unavailability, but some may serve it without
1657                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1658                         # the original webpage as well as el=info and el=embedded get_video_info
1659                         # requests report video unavailability due to geo restriction while
1660                         # el=detailpage succeeds and returns valid data). This is probably
1661                         # due to YouTube measures against IP ranges of hosting providers.
1662                         # Working around by preferring the first succeeded video_info containing
1663                         # the token if no such video_info yet was found.
1664                         if 'token' not in video_info:
1665                             video_info = get_video_info
1666                         break
1667
1668         def extract_unavailable_message():
1669             return self._html_search_regex(
1670                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1671                 video_webpage, 'unavailable message', default=None)
1672
1673         if 'token' not in video_info:
1674             if 'reason' in video_info:
1675                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1676                     regions_allowed = self._html_search_meta(
1677                         'regionsAllowed', video_webpage, default=None)
1678                     countries = regions_allowed.split(',') if regions_allowed else None
1679                     self.raise_geo_restricted(
1680                         msg=video_info['reason'][0], countries=countries)
1681                 reason = video_info['reason'][0]
1682                 if 'Invalid parameters' in reason:
1683                     unavailable_message = extract_unavailable_message()
1684                     if unavailable_message:
1685                         reason = unavailable_message
1686                 raise ExtractorError(
1687                     'YouTube said: %s' % reason,
1688                     expected=True, video_id=video_id)
1689             else:
1690                 raise ExtractorError(
1691                     '"token" parameter not in video info for unknown reason',
1692                     video_id=video_id)
1693
1694         if video_info.get('license_info'):
1695             raise ExtractorError('This video is DRM protected.', expected=True)
1696
1697         video_details = try_get(
1698             player_response, lambda x: x['videoDetails'], dict) or {}
1699
1700         # title
1701         if 'title' in video_info:
1702             video_title = video_info['title'][0]
1703         elif 'title' in player_response:
1704             video_title = video_details['title']
1705         else:
1706             self._downloader.report_warning('Unable to extract video title')
1707             video_title = '_'
1708
1709         # description
1710         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1711         if video_description:
1712
1713             def replace_url(m):
1714                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1715                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1716                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1717                     qs = compat_parse_qs(parsed_redir_url.query)
1718                     q = qs.get('q')
1719                     if q and q[0]:
1720                         return q[0]
1721                 return redir_url
1722
1723             description_original = video_description = re.sub(r'''(?x)
1724                 <a\s+
1725                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1726                     (?:title|href)="([^"]+)"\s+
1727                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1728                     class="[^"]*"[^>]*>
1729                 [^<]+\.{3}\s*
1730                 </a>
1731             ''', replace_url, video_description)
1732             video_description = clean_html(video_description)
1733         else:
1734             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1735             if fd_mobj:
1736                 video_description = unescapeHTML(fd_mobj.group(1))
1737             else:
1738                 video_description = ''
1739
1740         if not smuggled_data.get('force_singlefeed', False):
1741             if not self._downloader.params.get('noplaylist'):
1742                 multifeed_metadata_list = try_get(
1743                     player_response,
1744                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1745                     compat_str) or try_get(
1746                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1747                 if multifeed_metadata_list:
1748                     entries = []
1749                     feed_ids = []
1750                     for feed in multifeed_metadata_list.split(','):
1751                         # Unquote should take place before split on comma (,) since textual
1752                         # fields may contain comma as well (see
1753                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1754                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1755                         entries.append({
1756                             '_type': 'url_transparent',
1757                             'ie_key': 'Youtube',
1758                             'url': smuggle_url(
1759                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1760                                 {'force_singlefeed': True}),
1761                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1762                         })
1763                         feed_ids.append(feed_data['id'][0])
1764                     self.to_screen(
1765                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1766                         % (', '.join(feed_ids), video_id))
1767                     return self.playlist_result(entries, video_id, video_title, video_description)
1768             else:
1769                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1770
1771         if view_count is None:
1772             view_count = extract_view_count(video_info)
1773         if view_count is None and video_details:
1774             view_count = int_or_none(video_details.get('viewCount'))
1775
1776         # Check for "rental" videos
1777         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1778             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1779
1780         def _extract_filesize(media_url):
1781             return int_or_none(self._search_regex(
1782                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1783
1784         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1785             self.report_rtmp_download()
1786             formats = [{
1787                 'format_id': '_rtmp',
1788                 'protocol': 'rtmp',
1789                 'url': video_info['conn'][0],
1790                 'player_url': player_url,
1791             }]
1792         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1793             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1794             if 'rtmpe%3Dyes' in encoded_url_map:
1795                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1796             formats_spec = {}
1797             fmt_list = video_info.get('fmt_list', [''])[0]
1798             if fmt_list:
1799                 for fmt in fmt_list.split(','):
1800                     spec = fmt.split('/')
1801                     if len(spec) > 1:
1802                         width_height = spec[1].split('x')
1803                         if len(width_height) == 2:
1804                             formats_spec[spec[0]] = {
1805                                 'resolution': spec[1],
1806                                 'width': int_or_none(width_height[0]),
1807                                 'height': int_or_none(width_height[1]),
1808                             }
1809             q = qualities(['small', 'medium', 'hd720'])
1810             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1811             if streaming_formats:
1812                 for fmt in streaming_formats:
1813                     itag = str_or_none(fmt.get('itag'))
1814                     if not itag:
1815                         continue
1816                     quality = fmt.get('quality')
1817                     quality_label = fmt.get('qualityLabel') or quality
1818                     formats_spec[itag] = {
1819                         'asr': int_or_none(fmt.get('audioSampleRate')),
1820                         'filesize': int_or_none(fmt.get('contentLength')),
1821                         'format_note': quality_label,
1822                         'fps': int_or_none(fmt.get('fps')),
1823                         'height': int_or_none(fmt.get('height')),
1824                         'quality': q(quality),
1825                         # bitrate for itag 43 is always 2147483647
1826                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1827                         'width': int_or_none(fmt.get('width')),
1828                     }
1829             formats = []
1830             for url_data_str in encoded_url_map.split(','):
1831                 url_data = compat_parse_qs(url_data_str)
1832                 if 'itag' not in url_data or 'url' not in url_data:
1833                     continue
1834                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1835                 # Unsupported FORMAT_STREAM_TYPE_OTF
1836                 if stream_type == 3:
1837                     continue
1838                 format_id = url_data['itag'][0]
1839                 url = url_data['url'][0]
1840
1841                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1842                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1843                     jsplayer_url_json = self._search_regex(
1844                         ASSETS_RE,
1845                         embed_webpage if age_gate else video_webpage,
1846                         'JS player URL (1)', default=None)
1847                     if not jsplayer_url_json and not age_gate:
1848                         # We need the embed website after all
1849                         if embed_webpage is None:
1850                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1851                             embed_webpage = self._download_webpage(
1852                                 embed_url, video_id, 'Downloading embed webpage')
1853                         jsplayer_url_json = self._search_regex(
1854                             ASSETS_RE, embed_webpage, 'JS player URL')
1855
1856                     player_url = json.loads(jsplayer_url_json)
1857                     if player_url is None:
1858                         player_url_json = self._search_regex(
1859                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1860                             video_webpage, 'age gate player URL')
1861                         player_url = json.loads(player_url_json)
1862
1863                 if 'sig' in url_data:
1864                     url += '&signature=' + url_data['sig'][0]
1865                 elif 's' in url_data:
1866                     encrypted_sig = url_data['s'][0]
1867
1868                     if self._downloader.params.get('verbose'):
1869                         if player_url is None:
1870                             player_version = 'unknown'
1871                             player_desc = 'unknown'
1872                         else:
1873                             if player_url.endswith('swf'):
1874                                 player_version = self._search_regex(
1875                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1876                                     'flash player', fatal=False)
1877                                 player_desc = 'flash player %s' % player_version
1878                             else:
1879                                 player_version = self._search_regex(
1880                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1881                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1882                                     player_url,
1883                                     'html5 player', fatal=False)
1884                                 player_desc = 'html5 player %s' % player_version
1885
1886                         parts_sizes = self._signature_cache_id(encrypted_sig)
1887                         self.to_screen('{%s} signature length %s, %s' %
1888                                        (format_id, parts_sizes, player_desc))
1889
1890                     signature = self._decrypt_signature(
1891                         encrypted_sig, video_id, player_url, age_gate)
1892                     url += '&signature=' + signature
1893                 if 'ratebypass' not in url:
1894                     url += '&ratebypass=yes'
1895
1896                 dct = {
1897                     'format_id': format_id,
1898                     'url': url,
1899                     'player_url': player_url,
1900                 }
1901                 if format_id in self._formats:
1902                     dct.update(self._formats[format_id])
1903                 if format_id in formats_spec:
1904                     dct.update(formats_spec[format_id])
1905
1906                 # Some itags are not included in DASH manifest thus corresponding formats will
1907                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
1908                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1909                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1910                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1911
1912                 filesize = int_or_none(url_data.get(
1913                     'clen', [None])[0]) or _extract_filesize(url)
1914
1915                 quality = url_data.get('quality', [None])[0]
1916
1917                 more_fields = {
1918                     'filesize': filesize,
1919                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1920                     'width': width,
1921                     'height': height,
1922                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1923                     'format_note': url_data.get('quality_label', [None])[0] or quality,
1924                     'quality': q(quality),
1925                 }
1926                 for key, value in more_fields.items():
1927                     if value:
1928                         dct[key] = value
1929                 type_ = url_data.get('type', [None])[0]
1930                 if type_:
1931                     type_split = type_.split(';')
1932                     kind_ext = type_split[0].split('/')
1933                     if len(kind_ext) == 2:
1934                         kind, _ = kind_ext
1935                         dct['ext'] = mimetype2ext(type_split[0])
1936                         if kind in ('audio', 'video'):
1937                             codecs = None
1938                             for mobj in re.finditer(
1939                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1940                                 if mobj.group('key') == 'codecs':
1941                                     codecs = mobj.group('val')
1942                                     break
1943                             if codecs:
1944                                 dct.update(parse_codecs(codecs))
1945                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1946                     dct['downloader_options'] = {
1947                         # Youtube throttles chunks >~10M
1948                         'http_chunk_size': 10485760,
1949                     }
1950                 formats.append(dct)
1951         else:
1952             manifest_url = (
1953                 url_or_none(try_get(
1954                     player_response,
1955                     lambda x: x['streamingData']['hlsManifestUrl'],
1956                     compat_str)) or
1957                 url_or_none(try_get(
1958                     video_info, lambda x: x['hlsvp'][0], compat_str)))
1959             if manifest_url:
1960                 formats = []
1961                 m3u8_formats = self._extract_m3u8_formats(
1962                     manifest_url, video_id, 'mp4', fatal=False)
1963                 for a_format in m3u8_formats:
1964                     itag = self._search_regex(
1965                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1966                     if itag:
1967                         a_format['format_id'] = itag
1968                         if itag in self._formats:
1969                             dct = self._formats[itag].copy()
1970                             dct.update(a_format)
1971                             a_format = dct
1972                     a_format['player_url'] = player_url
1973                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1974                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1975                     formats.append(a_format)
1976             else:
1977                 error_message = clean_html(video_info.get('reason', [None])[0])
1978                 if not error_message:
1979                     error_message = extract_unavailable_message()
1980                 if error_message:
1981                     raise ExtractorError(error_message, expected=True)
1982                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
1983
1984         # uploader
1985         video_uploader = try_get(
1986             video_info, lambda x: x['author'][0],
1987             compat_str) or str_or_none(video_details.get('author'))
1988         if video_uploader:
1989             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1990         else:
1991             self._downloader.report_warning('unable to extract uploader name')
1992
1993         # uploader_id
1994         video_uploader_id = None
1995         video_uploader_url = None
1996         mobj = re.search(
1997             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1998             video_webpage)
1999         if mobj is not None:
2000             video_uploader_id = mobj.group('uploader_id')
2001             video_uploader_url = mobj.group('uploader_url')
2002         else:
2003             self._downloader.report_warning('unable to extract uploader nickname')
2004
2005         channel_id = self._html_search_meta(
2006             'channelId', video_webpage, 'channel id')
2007         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2008
2009         # thumbnail image
2010         # We try first to get a high quality image:
2011         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2012                             video_webpage, re.DOTALL)
2013         if m_thumb is not None:
2014             video_thumbnail = m_thumb.group(1)
2015         elif 'thumbnail_url' not in video_info:
2016             self._downloader.report_warning('unable to extract video thumbnail')
2017             video_thumbnail = None
2018         else:   # don't panic if we can't find it
2019             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2020
2021         # upload date
2022         upload_date = self._html_search_meta(
2023             'datePublished', video_webpage, 'upload date', default=None)
2024         if not upload_date:
2025             upload_date = self._search_regex(
2026                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2027                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2028                 video_webpage, 'upload date', default=None)
2029         upload_date = unified_strdate(upload_date)
2030
2031         video_license = self._html_search_regex(
2032             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2033             video_webpage, 'license', default=None)
2034
2035         m_music = re.search(
2036             r'''(?x)
2037                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2038                 <ul[^>]*>\s*
2039                 <li>(?P<title>.+?)
2040                 by (?P<creator>.+?)
2041                 (?:
2042                     \(.+?\)|
2043                     <a[^>]*
2044                         (?:
2045                             \bhref=["\']/red[^>]*>|             # drop possible
2046                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2047                         )
2048                     .*?
2049                 )?</li
2050             ''',
2051             video_webpage)
2052         if m_music:
2053             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2054             video_creator = clean_html(m_music.group('creator'))
2055         else:
2056             video_alt_title = video_creator = None
2057
2058         def extract_meta(field):
2059             return self._html_search_regex(
2060                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2061                 video_webpage, field, default=None)
2062
2063         track = extract_meta('Song')
2064         artist = extract_meta('Artist')
2065
2066         m_episode = re.search(
2067             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2068             video_webpage)
2069         if m_episode:
2070             series = unescapeHTML(m_episode.group('series'))
2071             season_number = int(m_episode.group('season'))
2072             episode_number = int(m_episode.group('episode'))
2073         else:
2074             series = season_number = episode_number = None
2075
2076         m_cat_container = self._search_regex(
2077             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2078             video_webpage, 'categories', default=None)
2079         if m_cat_container:
2080             category = self._html_search_regex(
2081                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2082                 default=None)
2083             video_categories = None if category is None else [category]
2084         else:
2085             video_categories = None
2086
2087         video_tags = [
2088             unescapeHTML(m.group('content'))
2089             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2090
2091         def _extract_count(count_name):
2092             return str_to_int(self._search_regex(
2093                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2094                 % re.escape(count_name),
2095                 video_webpage, count_name, default=None))
2096
2097         like_count = _extract_count('like')
2098         dislike_count = _extract_count('dislike')
2099
2100         if view_count is None:
2101             view_count = str_to_int(self._search_regex(
2102                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2103                 'view count', default=None))
2104
2105         # subtitles
2106         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2107         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2108
2109         video_duration = try_get(
2110             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2111         if not video_duration:
2112             video_duration = int_or_none(video_details.get('lengthSeconds'))
2113         if not video_duration:
2114             video_duration = parse_duration(self._html_search_meta(
2115                 'duration', video_webpage, 'video duration'))
2116
2117         # annotations
2118         video_annotations = None
2119         if self._downloader.params.get('writeannotations', False):
2120             video_annotations = self._extract_annotations(video_id)
2121
2122         chapters = self._extract_chapters(description_original, video_duration)
2123
2124         # Look for the DASH manifest
2125         if self._downloader.params.get('youtube_include_dash_manifest', True):
2126             dash_mpd_fatal = True
2127             for mpd_url in dash_mpds:
2128                 dash_formats = {}
2129                 try:
2130                     def decrypt_sig(mobj):
2131                         s = mobj.group(1)
2132                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2133                         return '/signature/%s' % dec_s
2134
2135                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2136
2137                     for df in self._extract_mpd_formats(
2138                             mpd_url, video_id, fatal=dash_mpd_fatal,
2139                             formats_dict=self._formats):
2140                         if not df.get('filesize'):
2141                             df['filesize'] = _extract_filesize(df['url'])
2142                         # Do not overwrite DASH format found in some previous DASH manifest
2143                         if df['format_id'] not in dash_formats:
2144                             dash_formats[df['format_id']] = df
2145                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2146                         # allow them to fail without bug report message if we already have
2147                         # some DASH manifest succeeded. This is temporary workaround to reduce
2148                         # burst of bug reports until we figure out the reason and whether it
2149                         # can be fixed at all.
2150                         dash_mpd_fatal = False
2151                 except (ExtractorError, KeyError) as e:
2152                     self.report_warning(
2153                         'Skipping DASH manifest: %r' % e, video_id)
2154                 if dash_formats:
2155                     # Remove the formats we found through non-DASH, they
2156                     # contain less info and it can be wrong, because we use
2157                     # fixed values (for example the resolution). See
2158                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2159                     # example.
2160                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2161                     formats.extend(dash_formats.values())
2162
2163         # Check for malformed aspect ratio
2164         stretched_m = re.search(
2165             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2166             video_webpage)
2167         if stretched_m:
2168             w = float(stretched_m.group('w'))
2169             h = float(stretched_m.group('h'))
2170             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2171             # We will only process correct ratios.
2172             if w > 0 and h > 0:
2173                 ratio = w / h
2174                 for f in formats:
2175                     if f.get('vcodec') != 'none':
2176                         f['stretched_ratio'] = ratio
2177
2178         self._sort_formats(formats)
2179
2180         self.mark_watched(video_id, video_info, player_response)
2181
2182         return {
2183             'id': video_id,
2184             'uploader': video_uploader,
2185             'uploader_id': video_uploader_id,
2186             'uploader_url': video_uploader_url,
2187             'channel_id': channel_id,
2188             'channel_url': channel_url,
2189             'upload_date': upload_date,
2190             'license': video_license,
2191             'creator': video_creator or artist,
2192             'title': video_title,
2193             'alt_title': video_alt_title or track,
2194             'thumbnail': video_thumbnail,
2195             'description': video_description,
2196             'categories': video_categories,
2197             'tags': video_tags,
2198             'subtitles': video_subtitles,
2199             'automatic_captions': automatic_captions,
2200             'duration': video_duration,
2201             'age_limit': 18 if age_gate else 0,
2202             'annotations': video_annotations,
2203             'chapters': chapters,
2204             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2205             'view_count': view_count,
2206             'like_count': like_count,
2207             'dislike_count': dislike_count,
2208             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2209             'formats': formats,
2210             'is_live': is_live,
2211             'start_time': start_time,
2212             'end_time': end_time,
2213             'series': series,
2214             'season_number': season_number,
2215             'episode_number': episode_number,
2216             'track': track,
2217             'artist': artist,
2218         }
2219
2220
2221 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2222     IE_DESC = 'YouTube.com playlists'
2223     _VALID_URL = r"""(?x)(?:
2224                         (?:https?://)?
2225                         (?:\w+\.)?
2226                         (?:
2227                             (?:
2228                                 youtube\.com|
2229                                 invidio\.us
2230                             )
2231                             /
2232                             (?:
2233                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2234                                \? (?:.*?[&;])*? (?:p|a|list)=
2235                             |  p/
2236                             )|
2237                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2238                         )
2239                         (
2240                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2241                             # Top tracks, they can also include dots
2242                             |(?:MC)[\w\.]*
2243                         )
2244                         .*
2245                      |
2246                         (%(playlist_id)s)
2247                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2248     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2249     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2250     IE_NAME = 'youtube:playlist'
2251     _TESTS = [{
2252         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2253         'info_dict': {
2254             'title': 'ytdl test PL',
2255             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2256         },
2257         'playlist_count': 3,
2258     }, {
2259         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2260         'info_dict': {
2261             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2262             'title': 'YDL_Empty_List',
2263         },
2264         'playlist_count': 0,
2265         'skip': 'This playlist is private',
2266     }, {
2267         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2268         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2269         'info_dict': {
2270             'title': '29C3: Not my department',
2271             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2272         },
2273         'playlist_count': 95,
2274     }, {
2275         'note': 'issue #673',
2276         'url': 'PLBB231211A4F62143',
2277         'info_dict': {
2278             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2279             'id': 'PLBB231211A4F62143',
2280         },
2281         'playlist_mincount': 26,
2282     }, {
2283         'note': 'Large playlist',
2284         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2285         'info_dict': {
2286             'title': 'Uploads from Cauchemar',
2287             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2288         },
2289         'playlist_mincount': 799,
2290     }, {
2291         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2292         'info_dict': {
2293             'title': 'YDL_safe_search',
2294             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2295         },
2296         'playlist_count': 2,
2297         'skip': 'This playlist is private',
2298     }, {
2299         'note': 'embedded',
2300         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2301         'playlist_count': 4,
2302         'info_dict': {
2303             'title': 'JODA15',
2304             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2305         }
2306     }, {
2307         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2308         'playlist_mincount': 485,
2309         'info_dict': {
2310             'title': '2017 華語最新單曲 (2/24更新)',
2311             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2312         }
2313     }, {
2314         'note': 'Embedded SWF player',
2315         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2316         'playlist_count': 4,
2317         'info_dict': {
2318             'title': 'JODA7',
2319             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2320         }
2321     }, {
2322         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2323         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2324         'info_dict': {
2325             'title': 'Uploads from Interstellar Movie',
2326             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2327         },
2328         'playlist_mincount': 21,
2329     }, {
2330         # Playlist URL that does not actually serve a playlist
2331         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2332         'info_dict': {
2333             'id': 'FqZTN594JQw',
2334             'ext': 'webm',
2335             'title': "Smiley's People 01 detective, Adventure Series, Action",
2336             'uploader': 'STREEM',
2337             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2338             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2339             'upload_date': '20150526',
2340             'license': 'Standard YouTube License',
2341             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2342             'categories': ['People & Blogs'],
2343             'tags': list,
2344             'view_count': int,
2345             'like_count': int,
2346             'dislike_count': int,
2347         },
2348         'params': {
2349             'skip_download': True,
2350         },
2351         'add_ie': [YoutubeIE.ie_key()],
2352     }, {
2353         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2354         'info_dict': {
2355             'id': 'yeWKywCrFtk',
2356             'ext': 'mp4',
2357             'title': 'Small Scale Baler and Braiding Rugs',
2358             'uploader': 'Backus-Page House Museum',
2359             'uploader_id': 'backuspagemuseum',
2360             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2361             'upload_date': '20161008',
2362             'license': 'Standard YouTube License',
2363             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2364             'categories': ['Nonprofits & Activism'],
2365             'tags': list,
2366             'like_count': int,
2367             'dislike_count': int,
2368         },
2369         'params': {
2370             'noplaylist': True,
2371             'skip_download': True,
2372         },
2373     }, {
2374         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2375         'only_matching': True,
2376     }, {
2377         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2378         'only_matching': True,
2379     }, {
2380         # music album playlist
2381         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2382         'only_matching': True,
2383     }, {
2384         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2385         'only_matching': True,
2386     }]
2387
2388     def _real_initialize(self):
2389         self._login()
2390
2391     def _extract_mix(self, playlist_id):
2392         # The mixes are generated from a single video
2393         # the id of the playlist is just 'RD' + video_id
2394         ids = []
2395         last_id = playlist_id[-11:]
2396         for n in itertools.count(1):
2397             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2398             webpage = self._download_webpage(
2399                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2400             new_ids = orderedSet(re.findall(
2401                 r'''(?xs)data-video-username=".*?".*?
2402                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2403                 webpage))
2404             # Fetch new pages until all the videos are repeated, it seems that
2405             # there are always 51 unique videos.
2406             new_ids = [_id for _id in new_ids if _id not in ids]
2407             if not new_ids:
2408                 break
2409             ids.extend(new_ids)
2410             last_id = ids[-1]
2411
2412         url_results = self._ids_to_results(ids)
2413
2414         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2415         title_span = (
2416             search_title('playlist-title') or
2417             search_title('title long-title') or
2418             search_title('title'))
2419         title = clean_html(title_span)
2420
2421         return self.playlist_result(url_results, playlist_id, title)
2422
2423     def _extract_playlist(self, playlist_id):
2424         url = self._TEMPLATE_URL % playlist_id
2425         page = self._download_webpage(url, playlist_id)
2426
2427         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2428         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2429             match = match.strip()
2430             # Check if the playlist exists or is private
2431             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2432             if mobj:
2433                 reason = mobj.group('reason')
2434                 message = 'This playlist %s' % reason
2435                 if 'private' in reason:
2436                     message += ', use --username or --netrc to access it'
2437                 message += '.'
2438                 raise ExtractorError(message, expected=True)
2439             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2440                 raise ExtractorError(
2441                     'Invalid parameters. Maybe URL is incorrect.',
2442                     expected=True)
2443             elif re.match(r'[^<]*Choose your language[^<]*', match):
2444                 continue
2445             else:
2446                 self.report_warning('Youtube gives an alert message: ' + match)
2447
2448         playlist_title = self._html_search_regex(
2449             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2450             page, 'title', default=None)
2451
2452         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2453         uploader = self._search_regex(
2454             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2455             page, 'uploader', default=None)
2456         mobj = re.search(
2457             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2458             page)
2459         if mobj:
2460             uploader_id = mobj.group('uploader_id')
2461             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2462         else:
2463             uploader_id = uploader_url = None
2464
2465         has_videos = True
2466
2467         if not playlist_title:
2468             try:
2469                 # Some playlist URLs don't actually serve a playlist (e.g.
2470                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2471                 next(self._entries(page, playlist_id))
2472             except StopIteration:
2473                 has_videos = False
2474
2475         playlist = self.playlist_result(
2476             self._entries(page, playlist_id), playlist_id, playlist_title)
2477         playlist.update({
2478             'uploader': uploader,
2479             'uploader_id': uploader_id,
2480             'uploader_url': uploader_url,
2481         })
2482
2483         return has_videos, playlist
2484
2485     def _check_download_just_video(self, url, playlist_id):
2486         # Check if it's a video-specific URL
2487         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2488         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2489             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2490             'video id', default=None)
2491         if video_id:
2492             if self._downloader.params.get('noplaylist'):
2493                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2494                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2495             else:
2496                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2497                 return video_id, None
2498         return None, None
2499
2500     def _real_extract(self, url):
2501         # Extract playlist id
2502         mobj = re.match(self._VALID_URL, url)
2503         if mobj is None:
2504             raise ExtractorError('Invalid URL: %s' % url)
2505         playlist_id = mobj.group(1) or mobj.group(2)
2506
2507         video_id, video = self._check_download_just_video(url, playlist_id)
2508         if video:
2509             return video
2510
2511         if playlist_id.startswith(('RD', 'UL', 'PU')):
2512             # Mixes require a custom extraction process
2513             return self._extract_mix(playlist_id)
2514
2515         has_videos, playlist = self._extract_playlist(playlist_id)
2516         if has_videos or not video_id:
2517             return playlist
2518
2519         # Some playlist URLs don't actually serve a playlist (see
2520         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2521         # Fallback to plain video extraction if there is a video id
2522         # along with playlist id.
2523         return self.url_result(video_id, 'Youtube', video_id=video_id)
2524
2525
2526 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2527     IE_DESC = 'YouTube.com channels'
2528     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2529     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2530     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2531     IE_NAME = 'youtube:channel'
2532     _TESTS = [{
2533         'note': 'paginated channel',
2534         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2535         'playlist_mincount': 91,
2536         'info_dict': {
2537             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2538             'title': 'Uploads from lex will',
2539         }
2540     }, {
2541         'note': 'Age restricted channel',
2542         # from https://www.youtube.com/user/DeusExOfficial
2543         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2544         'playlist_mincount': 64,
2545         'info_dict': {
2546             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2547             'title': 'Uploads from Deus Ex',
2548         },
2549     }, {
2550         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2551         'only_matching': True,
2552     }]
2553
2554     @classmethod
2555     def suitable(cls, url):
2556         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2557                 else super(YoutubeChannelIE, cls).suitable(url))
2558
2559     def _build_template_url(self, url, channel_id):
2560         return self._TEMPLATE_URL % channel_id
2561
2562     def _real_extract(self, url):
2563         channel_id = self._match_id(url)
2564
2565         url = self._build_template_url(url, channel_id)
2566
2567         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2568         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2569         # otherwise fallback on channel by page extraction
2570         channel_page = self._download_webpage(
2571             url + '?view=57', channel_id,
2572             'Downloading channel page', fatal=False)
2573         if channel_page is False:
2574             channel_playlist_id = False
2575         else:
2576             channel_playlist_id = self._html_search_meta(
2577                 'channelId', channel_page, 'channel id', default=None)
2578             if not channel_playlist_id:
2579                 channel_url = self._html_search_meta(
2580                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2581                     channel_page, 'channel url', default=None)
2582                 if channel_url:
2583                     channel_playlist_id = self._search_regex(
2584                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2585                         channel_url, 'channel id', default=None)
2586         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2587             playlist_id = 'UU' + channel_playlist_id[2:]
2588             return self.url_result(
2589                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2590
2591         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2592         autogenerated = re.search(r'''(?x)
2593                 class="[^"]*?(?:
2594                     channel-header-autogenerated-label|
2595                     yt-channel-title-autogenerated
2596                 )[^"]*"''', channel_page) is not None
2597
2598         if autogenerated:
2599             # The videos are contained in a single page
2600             # the ajax pages can't be used, they are empty
2601             entries = [
2602                 self.url_result(
2603                     video_id, 'Youtube', video_id=video_id,
2604                     video_title=video_title)
2605                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2606             return self.playlist_result(entries, channel_id)
2607
2608         try:
2609             next(self._entries(channel_page, channel_id))
2610         except StopIteration:
2611             alert_message = self._html_search_regex(
2612                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2613                 channel_page, 'alert', default=None, group='alert')
2614             if alert_message:
2615                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2616
2617         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2618
2619
2620 class YoutubeUserIE(YoutubeChannelIE):
2621     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2622     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2623     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2624     IE_NAME = 'youtube:user'
2625
2626     _TESTS = [{
2627         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2628         'playlist_mincount': 320,
2629         'info_dict': {
2630             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2631             'title': 'Uploads from The Linux Foundation',
2632         }
2633     }, {
2634         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2635         # but not https://www.youtube.com/user/12minuteathlete/videos
2636         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2637         'playlist_mincount': 249,
2638         'info_dict': {
2639             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2640             'title': 'Uploads from 12 Minute Athlete',
2641         }
2642     }, {
2643         'url': 'ytuser:phihag',
2644         'only_matching': True,
2645     }, {
2646         'url': 'https://www.youtube.com/c/gametrailers',
2647         'only_matching': True,
2648     }, {
2649         'url': 'https://www.youtube.com/gametrailers',
2650         'only_matching': True,
2651     }, {
2652         # This channel is not available, geo restricted to JP
2653         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2654         'only_matching': True,
2655     }]
2656
2657     @classmethod
2658     def suitable(cls, url):
2659         # Don't return True if the url can be extracted with other youtube
2660         # extractor, the regex would is too permissive and it would match.
2661         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2662         if any(ie.suitable(url) for ie in other_yt_ies):
2663             return False
2664         else:
2665             return super(YoutubeUserIE, cls).suitable(url)
2666
2667     def _build_template_url(self, url, channel_id):
2668         mobj = re.match(self._VALID_URL, url)
2669         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2670
2671
2672 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2673     IE_DESC = 'YouTube.com live streams'
2674     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2675     IE_NAME = 'youtube:live'
2676
2677     _TESTS = [{
2678         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2679         'info_dict': {
2680             'id': 'a48o2S1cPoo',
2681             'ext': 'mp4',
2682             'title': 'The Young Turks - Live Main Show',
2683             'uploader': 'The Young Turks',
2684             'uploader_id': 'TheYoungTurks',
2685             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2686             'upload_date': '20150715',
2687             'license': 'Standard YouTube License',
2688             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2689             'categories': ['News & Politics'],
2690             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2691             'like_count': int,
2692             'dislike_count': int,
2693         },
2694         'params': {
2695             'skip_download': True,
2696         },
2697     }, {
2698         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2699         'only_matching': True,
2700     }, {
2701         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2702         'only_matching': True,
2703     }, {
2704         'url': 'https://www.youtube.com/TheYoungTurks/live',
2705         'only_matching': True,
2706     }]
2707
2708     def _real_extract(self, url):
2709         mobj = re.match(self._VALID_URL, url)
2710         channel_id = mobj.group('id')
2711         base_url = mobj.group('base_url')
2712         webpage = self._download_webpage(url, channel_id, fatal=False)
2713         if webpage:
2714             page_type = self._og_search_property(
2715                 'type', webpage, 'page type', default='')
2716             video_id = self._html_search_meta(
2717                 'videoId', webpage, 'video id', default=None)
2718             if page_type.startswith('video') and video_id and re.match(
2719                     r'^[0-9A-Za-z_-]{11}$', video_id):
2720                 return self.url_result(video_id, YoutubeIE.ie_key())
2721         return self.url_result(base_url)
2722
2723
2724 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2725     IE_DESC = 'YouTube.com user/channel playlists'
2726     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2727     IE_NAME = 'youtube:playlists'
2728
2729     _TESTS = [{
2730         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2731         'playlist_mincount': 4,
2732         'info_dict': {
2733             'id': 'ThirstForScience',
2734             'title': 'Thirst for Science',
2735         },
2736     }, {
2737         # with "Load more" button
2738         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2739         'playlist_mincount': 70,
2740         'info_dict': {
2741             'id': 'igorkle1',
2742             'title': 'Игорь Клейнер',
2743         },
2744     }, {
2745         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2746         'playlist_mincount': 17,
2747         'info_dict': {
2748             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2749             'title': 'Chem Player',
2750         },
2751     }]
2752
2753
2754 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2755     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2756
2757
2758 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2759     IE_DESC = 'YouTube.com searches'
2760     # there doesn't appear to be a real limit, for example if you search for
2761     # 'python' you get more than 8.000.000 results
2762     _MAX_RESULTS = float('inf')
2763     IE_NAME = 'youtube:search'
2764     _SEARCH_KEY = 'ytsearch'
2765     _EXTRA_QUERY_ARGS = {}
2766     _TESTS = []
2767
2768     def _get_n_results(self, query, n):
2769         """Get a specified number of results for a query"""
2770
2771         videos = []
2772         limit = n
2773
2774         url_query = {
2775             'search_query': query.encode('utf-8'),
2776         }
2777         url_query.update(self._EXTRA_QUERY_ARGS)
2778         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2779
2780         for pagenum in itertools.count(1):
2781             data = self._download_json(
2782                 result_url, video_id='query "%s"' % query,
2783                 note='Downloading page %s' % pagenum,
2784                 errnote='Unable to download API page',
2785                 query={'spf': 'navigate'})
2786             html_content = data[1]['body']['content']
2787
2788             if 'class="search-message' in html_content:
2789                 raise ExtractorError(
2790                     '[youtube] No video results', expected=True)
2791
2792             new_videos = list(self._process_page(html_content))
2793             videos += new_videos
2794             if not new_videos or len(videos) > limit:
2795                 break
2796             next_link = self._html_search_regex(
2797                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2798                 html_content, 'next link', default=None)
2799             if next_link is None:
2800                 break
2801             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2802
2803         if len(videos) > n:
2804             videos = videos[:n]
2805         return self.playlist_result(videos, query)
2806
2807
2808 class YoutubeSearchDateIE(YoutubeSearchIE):
2809     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2810     _SEARCH_KEY = 'ytsearchdate'
2811     IE_DESC = 'YouTube.com searches, newest videos first'
2812     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2813
2814
2815 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2816     IE_DESC = 'YouTube.com search URLs'
2817     IE_NAME = 'youtube:search_url'
2818     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2819     _TESTS = [{
2820         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2821         'playlist_mincount': 5,
2822         'info_dict': {
2823             'title': 'youtube-dl test video',
2824         }
2825     }, {
2826         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2827         'only_matching': True,
2828     }]
2829
2830     def _real_extract(self, url):
2831         mobj = re.match(self._VALID_URL, url)
2832         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2833         webpage = self._download_webpage(url, query)
2834         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2835
2836
2837 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2838     IE_DESC = 'YouTube.com (multi-season) shows'
2839     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2840     IE_NAME = 'youtube:show'
2841     _TESTS = [{
2842         'url': 'https://www.youtube.com/show/airdisasters',
2843         'playlist_mincount': 5,
2844         'info_dict': {
2845             'id': 'airdisasters',
2846             'title': 'Air Disasters',
2847         }
2848     }]
2849
2850     def _real_extract(self, url):
2851         playlist_id = self._match_id(url)
2852         return super(YoutubeShowIE, self)._real_extract(
2853             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2854
2855
2856 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2857     """
2858     Base class for feed extractors
2859     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2860     """
2861     _LOGIN_REQUIRED = True
2862
2863     @property
2864     def IE_NAME(self):
2865         return 'youtube:%s' % self._FEED_NAME
2866
2867     def _real_initialize(self):
2868         self._login()
2869
2870     def _entries(self, page):
2871         # The extraction process is the same as for playlists, but the regex
2872         # for the video ids doesn't contain an index
2873         ids = []
2874         more_widget_html = content_html = page
2875         for page_num in itertools.count(1):
2876             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2877
2878             # 'recommended' feed has infinite 'load more' and each new portion spins
2879             # the same videos in (sometimes) slightly different order, so we'll check
2880             # for unicity and break when portion has no new videos
2881             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2882             if not new_ids:
2883                 break
2884
2885             ids.extend(new_ids)
2886
2887             for entry in self._ids_to_results(new_ids):
2888                 yield entry
2889
2890             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2891             if not mobj:
2892                 break
2893
2894             more = self._download_json(
2895                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2896                 'Downloading page #%s' % page_num,
2897                 transform_source=uppercase_escape)
2898             content_html = more['content_html']
2899             more_widget_html = more['load_more_widget_html']
2900
2901     def _real_extract(self, url):
2902         page = self._download_webpage(
2903             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2904             self._PLAYLIST_TITLE)
2905         return self.playlist_result(
2906             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2907
2908
2909 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2910     IE_NAME = 'youtube:watchlater'
2911     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2912     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2913
2914     _TESTS = [{
2915         'url': 'https://www.youtube.com/playlist?list=WL',
2916         'only_matching': True,
2917     }, {
2918         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2919         'only_matching': True,
2920     }]
2921
2922     def _real_extract(self, url):
2923         _, video = self._check_download_just_video(url, 'WL')
2924         if video:
2925             return video
2926         _, playlist = self._extract_playlist('WL')
2927         return playlist
2928
2929
2930 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2931     IE_NAME = 'youtube:favorites'
2932     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2933     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2934     _LOGIN_REQUIRED = True
2935
2936     def _real_extract(self, url):
2937         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2938         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2939         return self.url_result(playlist_id, 'YoutubePlaylist')
2940
2941
2942 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2943     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2944     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2945     _FEED_NAME = 'recommended'
2946     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2947
2948
2949 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2950     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2951     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2952     _FEED_NAME = 'subscriptions'
2953     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2954
2955
2956 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2957     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2958     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2959     _FEED_NAME = 'history'
2960     _PLAYLIST_TITLE = 'Youtube History'
2961
2962
2963 class YoutubeTruncatedURLIE(InfoExtractor):
2964     IE_NAME = 'youtube:truncated_url'
2965     IE_DESC = False  # Do not list
2966     _VALID_URL = r'''(?x)
2967         (?:https?://)?
2968         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2969         (?:watch\?(?:
2970             feature=[a-z_]+|
2971             annotation_id=annotation_[^&]+|
2972             x-yt-cl=[0-9]+|
2973             hl=[^&]*|
2974             t=[0-9]+
2975         )?
2976         |
2977             attribution_link\?a=[^&]+
2978         )
2979         $
2980     '''
2981
2982     _TESTS = [{
2983         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2984         'only_matching': True,
2985     }, {
2986         'url': 'https://www.youtube.com/watch?',
2987         'only_matching': True,
2988     }, {
2989         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2990         'only_matching': True,
2991     }, {
2992         'url': 'https://www.youtube.com/watch?feature=foo',
2993         'only_matching': True,
2994     }, {
2995         'url': 'https://www.youtube.com/watch?hl=en-GB',
2996         'only_matching': True,
2997     }, {
2998         'url': 'https://www.youtube.com/watch?t=2372',
2999         'only_matching': True,
3000     }]
3001
3002     def _real_extract(self, url):
3003         raise ExtractorError(
3004             'Did you forget to quote the URL? Remember that & is a meta '
3005             'character in most shells, so you want to put the URL in quotes, '
3006             'like  youtube-dl '
3007             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3008             ' or simply  youtube-dl BaW_jenozKc  .',
3009             expected=True)
3010
3011
3012 class YoutubeTruncatedIDIE(InfoExtractor):
3013     IE_NAME = 'youtube:truncated_id'
3014     IE_DESC = False  # Do not list
3015     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3016
3017     _TESTS = [{
3018         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3019         'only_matching': True,
3020     }]
3021
3022     def _real_extract(self, url):
3023         video_id = self._match_id(url)
3024         raise ExtractorError(
3025             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3026             expected=True)