_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         tfa = try_get(res, lambda x: x[0][0], list)
 182         if tfa:
 183             tfa_str = try_get(tfa, lambda x: x[2], compat_str)
 184             if tfa_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(tfa, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231         else:
 232             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 233
 234         if not check_cookie_url:
 235             warn('Unable to extract CheckCookie URL')
 236             return False
 237
 238         check_cookie_results = self._download_webpage(
 239             check_cookie_url, None, 'Checking cookie', fatal=False)
 240
 241         if check_cookie_results is False:
 242             return False
 243
 244         if 'https://myaccount.google.com/' not in check_cookie_results:
 245             warn('Unable to log in')
 246             return False
 247
 248         return True
 249
 250     def _download_webpage_handle(self, *args, **kwargs):
 251         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
 252         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 253             *args, **compat_kwargs(kwargs))
 254
 255     def _real_initialize(self):
 256         if self._downloader is None:
 257             return
 258         self._set_language()
 259         if not self._login():
 260             return
 261
 262
 263 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 264     # Extract entries from page with "Load more" button
 265     def _entries(self, page, playlist_id):
 266         more_widget_html = content_html = page
 267         for page_num in itertools.count(1):
 268             for entry in self._process_page(content_html):
 269                 yield entry
 270
 271             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 272             if not mobj:
 273                 break
 274
 275             more = self._download_json(
 276                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 277                 'Downloading page #%s' % page_num,
 278                 transform_source=uppercase_escape)
 279             content_html = more['content_html']
 280             if not content_html.strip():
 281                 # Some webpages show a "Load more" button but they don't
 282                 # have more videos
 283                 break
 284             more_widget_html = more['load_more_widget_html']
 285
 286
 287 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 288     def _process_page(self, content):
 289         for video_id, video_title in self.extract_videos_from_page(content):
 290             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 291
 292     def extract_videos_from_page(self, page):
 293         ids_in_page = []
 294         titles_in_page = []
 295         for mobj in re.finditer(self._VIDEO_RE, page):
 296             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 297             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 298                 continue
 299             video_id = mobj.group('id')
 300             video_title = unescapeHTML(mobj.group('title'))
 301             if video_title:
 302                 video_title = video_title.strip()
 303             try:
 304                 idx = ids_in_page.index(video_id)
 305                 if video_title and not titles_in_page[idx]:
 306                     titles_in_page[idx] = video_title
 307             except ValueError:
 308                 ids_in_page.append(video_id)
 309                 titles_in_page.append(video_title)
 310         return zip(ids_in_page, titles_in_page)
 311
 312
 313 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 314     def _process_page(self, content):
 315         for playlist_id in orderedSet(re.findall(
 316                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 317                 content)):
 318             yield self.url_result(
 319                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 320
 321     def _real_extract(self, url):
 322         playlist_id = self._match_id(url)
 323         webpage = self._download_webpage(url, playlist_id)
 324         title = self._og_search_title(webpage, fatal=False)
 325         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 326
 327
 328 class YoutubeIE(YoutubeBaseInfoExtractor):
 329     IE_DESC = 'YouTube.com'
 330     _VALID_URL = r"""(?x)^
 331                      (
 332                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 333                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 334                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 335                             (?:www\.)?pwnyoutube\.com/|
 336                             (?:www\.)?hooktube\.com/|
 337                             (?:www\.)?yourepeat\.com/|
 338                             tube\.majestyc\.net/|
 339                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 340                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 341                          (?:                                                  # the various things that can precede the ID:
 342                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 343                              |(?:                                             # or the v= param in all its forms
 344                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 345                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 346                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 347                                  v=
 348                              )
 349                          ))
 350                          |(?:
 351                             youtu\.be|                                        # just youtu.be/xxxx
 352                             vid\.plus|                                        # or vid.plus/xxxx
 353                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 354                          )/
 355                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 356                          )
 357                      )?                                                       # all until now is optional -> you can pass the naked ID
 358                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 359                      (?!.*?\blist=
 360                         (?:
 361                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 362                             WL                                                # WL are handled by the watch later IE
 363                         )
 364                      )
 365                      (?(1).+)?                                                # if we found the ID, everything can follow
 366                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 367     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 368     _formats = {
 369         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 370         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 371         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 372         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 373         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 374         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 375         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 376         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 377         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 378         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 379         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 380         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 381         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 382         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 383         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 384         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 385         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 386         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 387
 388
 389         # 3D videos
 390         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 391         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 392         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 393         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 394         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 395         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 396         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 397
 398         # Apple HTTP Live Streaming
 399         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 400         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 401         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 402         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 403         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 404         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 405         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 406         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 407
 408         # DASH mp4 video
 409         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 410         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 411         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 412         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 413         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 414         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 415         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 416         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 417         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 418         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 419         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 420         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 421
 422         # Dash mp4 audio
 423         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 424         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 425         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 426         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 427         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 428         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 429         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 430
 431         # Dash webm
 432         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 433         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 434         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 435         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 436         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 437         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 438         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 439         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 440         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 441         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 442         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 443         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 444         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 445         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 446         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 447         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 448         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 449         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 450         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 451         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 452         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 454
 455         # Dash webm audio
 456         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 457         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 458
 459         # Dash webm audio with opus inside
 460         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 461         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 462         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 463
 464         # RTMP (unnamed)
 465         '_rtmp': {'protocol': 'rtmp'},
 466     }
 467     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 468
 469     _GEO_BYPASS = False
 470
 471     IE_NAME = 'youtube'
 472     _TESTS = [
 473         {
 474             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 475             'info_dict': {
 476                 'id': 'BaW_jenozKc',
 477                 'ext': 'mp4',
 478                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 479                 'uploader': 'Philipp Hagemeister',
 480                 'uploader_id': 'phihag',
 481                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 482                 'upload_date': '20121002',
 483                 'license': 'Standard YouTube License',
 484                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 485                 'categories': ['Science & Technology'],
 486                 'tags': ['youtube-dl'],
 487                 'duration': 10,
 488                 'like_count': int,
 489                 'dislike_count': int,
 490                 'start_time': 1,
 491                 'end_time': 9,
 492             }
 493         },
 494         {
 495             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 496             'note': 'Test generic use_cipher_signature video (#897)',
 497             'info_dict': {
 498                 'id': 'UxxajLWwzqY',
 499                 'ext': 'mp4',
 500                 'upload_date': '20120506',
 501                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 502                 'alt_title': 'I Love It (feat. Charli XCX)',
 503                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 504                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 505                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 506                          'iconic ep', 'iconic', 'love', 'it'],
 507                 'duration': 180,
 508                 'uploader': 'Icona Pop',
 509                 'uploader_id': 'IconaPop',
 510                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 511                 'license': 'Standard YouTube License',
 512                 'creator': 'Icona Pop',
 513                 'track': 'I Love It (feat. Charli XCX)',
 514                 'artist': 'Icona Pop',
 515             }
 516         },
 517         {
 518             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 519             'note': 'Test VEVO video with age protection (#956)',
 520             'info_dict': {
 521                 'id': '07FYdnEawAQ',
 522                 'ext': 'mp4',
 523                 'upload_date': '20130703',
 524                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 525                 'alt_title': 'Tunnel Vision',
 526                 'description': 'md5:64249768eec3bc4276236606ea996373',
 527                 'duration': 419,
 528                 'uploader': 'justintimberlakeVEVO',
 529                 'uploader_id': 'justintimberlakeVEVO',
 530                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 531                 'license': 'Standard YouTube License',
 532                 'creator': 'Justin Timberlake',
 533                 'track': 'Tunnel Vision`',
 534                 'artist': 'Justin Timberlake',
 535                 'age_limit': 18,
 536             }
 537         },
 538         {
 539             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 540             'note': 'Embed-only video (#1746)',
 541             'info_dict': {
 542                 'id': 'yZIXLfi8CZQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20120608',
 545                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 546                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 547                 'uploader': 'SET India',
 548                 'uploader_id': 'setindia',
 549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 550                 'license': 'Standard YouTube License',
 551                 'age_limit': 18,
 552             }
 553         },
 554         {
 555             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 556             'note': 'Use the first video ID in the URL',
 557             'info_dict': {
 558                 'id': 'BaW_jenozKc',
 559                 'ext': 'mp4',
 560                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 561                 'uploader': 'Philipp Hagemeister',
 562                 'uploader_id': 'phihag',
 563                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 564                 'upload_date': '20121002',
 565                 'license': 'Standard YouTube License',
 566                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 567                 'categories': ['Science & Technology'],
 568                 'tags': ['youtube-dl'],
 569                 'duration': 10,
 570                 'like_count': int,
 571                 'dislike_count': int,
 572             },
 573             'params': {
 574                 'skip_download': True,
 575             },
 576         },
 577         {
 578             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 579             'note': '256k DASH audio (format 141) via DASH manifest',
 580             'info_dict': {
 581                 'id': 'a9LDPn-MO4I',
 582                 'ext': 'm4a',
 583                 'upload_date': '20121002',
 584                 'uploader_id': '8KVIDEO',
 585                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 586                 'description': '',
 587                 'uploader': '8KVIDEO',
 588                 'license': 'Standard YouTube License',
 589                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 590             },
 591             'params': {
 592                 'youtube_include_dash_manifest': True,
 593                 'format': '141',
 594             },
 595             'skip': 'format 141 not served anymore',
 596         },
 597         # DASH manifest with encrypted signature
 598         {
 599             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 600             'info_dict': {
 601                 'id': 'IB3lcPjvWLA',
 602                 'ext': 'm4a',
 603                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 604                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 605                 'duration': 244,
 606                 'uploader': 'AfrojackVEVO',
 607                 'uploader_id': 'AfrojackVEVO',
 608                 'upload_date': '20131011',
 609                 'license': 'Standard YouTube License',
 610             },
 611             'params': {
 612                 'youtube_include_dash_manifest': True,
 613                 'format': '141/bestaudio[ext=m4a]',
 614             },
 615         },
 616         # JS player signature function name containing $
 617         {
 618             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 619             'info_dict': {
 620                 'id': 'nfWlot6h_JM',
 621                 'ext': 'm4a',
 622                 'title': 'Taylor Swift - Shake It Off',
 623                 'alt_title': 'Shake It Off',
 624                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 625                 'duration': 242,
 626                 'uploader': 'TaylorSwiftVEVO',
 627                 'uploader_id': 'TaylorSwiftVEVO',
 628                 'upload_date': '20140818',
 629                 'license': 'Standard YouTube License',
 630                 'creator': 'Taylor Swift',
 631             },
 632             'params': {
 633                 'youtube_include_dash_manifest': True,
 634                 'format': '141/bestaudio[ext=m4a]',
 635             },
 636         },
 637         # Controversy video
 638         {
 639             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 640             'info_dict': {
 641                 'id': 'T4XJQO3qol8',
 642                 'ext': 'mp4',
 643                 'duration': 219,
 644                 'upload_date': '20100909',
 645                 'uploader': 'The Amazing Atheist',
 646                 'uploader_id': 'TheAmazingAtheist',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 648                 'license': 'Standard YouTube License',
 649                 'title': 'Burning Everyone\'s Koran',
 650                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 651             }
 652         },
 653         # Normal age-gate video (No vevo, embed allowed)
 654         {
 655             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 656             'info_dict': {
 657                 'id': 'HtVdAasjOgU',
 658                 'ext': 'mp4',
 659                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 660                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 661                 'duration': 142,
 662                 'uploader': 'The Witcher',
 663                 'uploader_id': 'WitcherGame',
 664                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 665                 'upload_date': '20140605',
 666                 'license': 'Standard YouTube License',
 667                 'age_limit': 18,
 668             },
 669         },
 670         # Age-gate video with encrypted signature
 671         {
 672             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 673             'info_dict': {
 674                 'id': '6kLq3WMV1nU',
 675                 'ext': 'mp4',
 676                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 677                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 678                 'duration': 247,
 679                 'uploader': 'LloydVEVO',
 680                 'uploader_id': 'LloydVEVO',
 681                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 682                 'upload_date': '20110629',
 683                 'license': 'Standard YouTube License',
 684                 'age_limit': 18,
 685             },
 686         },
 687         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 688         # YouTube Red ad is not captured for creator
 689         {
 690             'url': '__2ABJjxzNo',
 691             'info_dict': {
 692                 'id': '__2ABJjxzNo',
 693                 'ext': 'mp4',
 694                 'duration': 266,
 695                 'upload_date': '20100430',
 696                 'uploader_id': 'deadmau5',
 697                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 698                 'creator': 'deadmau5',
 699                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 700                 'uploader': 'deadmau5',
 701                 'license': 'Standard YouTube License',
 702                 'title': 'Deadmau5 - Some Chords (HD)',
 703                 'alt_title': 'Some Chords',
 704             },
 705             'expected_warnings': [
 706                 'DASH manifest missing',
 707             ]
 708         },
 709         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 710         {
 711             'url': 'lqQg6PlCWgI',
 712             'info_dict': {
 713                 'id': 'lqQg6PlCWgI',
 714                 'ext': 'mp4',
 715                 'duration': 6085,
 716                 'upload_date': '20150827',
 717                 'uploader_id': 'olympic',
 718                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 719                 'license': 'Standard YouTube License',
 720                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 721                 'uploader': 'Olympic',
 722                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 723             },
 724             'params': {
 725                 'skip_download': 'requires avconv',
 726             }
 727         },
 728         # Non-square pixels
 729         {
 730             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 731             'info_dict': {
 732                 'id': '_b-2C3KPAM0',
 733                 'ext': 'mp4',
 734                 'stretched_ratio': 16 / 9.,
 735                 'duration': 85,
 736                 'upload_date': '20110310',
 737                 'uploader_id': 'AllenMeow',
 738                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 739                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 740                 'uploader': '孫艾倫',
 741                 'license': 'Standard YouTube License',
 742                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 743             },
 744         },
 745         # url_encoded_fmt_stream_map is empty string
 746         {
 747             'url': 'qEJwOuvDf7I',
 748             'info_dict': {
 749                 'id': 'qEJwOuvDf7I',
 750                 'ext': 'webm',
 751                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 752                 'description': '',
 753                 'upload_date': '20150404',
 754                 'uploader_id': 'spbelect',
 755                 'uploader': 'Наблюдатели Петербурга',
 756             },
 757             'params': {
 758                 'skip_download': 'requires avconv',
 759             },
 760             'skip': 'This live event has ended.',
 761         },
 762         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 763         {
 764             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 765             'info_dict': {
 766                 'id': 'FIl7x6_3R5Y',
 767                 'ext': 'mp4',
 768                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 769                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 770                 'duration': 220,
 771                 'upload_date': '20150625',
 772                 'uploader_id': 'dorappi2000',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 774                 'uploader': 'dorappi2000',
 775                 'license': 'Standard YouTube License',
 776                 'formats': 'mincount:32',
 777             },
 778         },
 779         # DASH manifest with segment_list
 780         {
 781             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 782             'md5': '8ce563a1d667b599d21064e982ab9e31',
 783             'info_dict': {
 784                 'id': 'CsmdDsKjzN8',
 785                 'ext': 'mp4',
 786                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 787                 'uploader': 'Airtek',
 788                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 789                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 790                 'license': 'Standard YouTube License',
 791                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 792             },
 793             'params': {
 794                 'youtube_include_dash_manifest': True,
 795                 'format': '135',  # bestvideo
 796             },
 797             'skip': 'This live event has ended.',
 798         },
 799         {
 800             # Multifeed videos (multiple cameras), URL is for Main Camera
 801             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 802             'info_dict': {
 803                 'id': 'jqWvoWXjCVs',
 804                 'title': 'teamPGP: Rocket League Noob Stream',
 805                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 806             },
 807             'playlist': [{
 808                 'info_dict': {
 809                     'id': 'jqWvoWXjCVs',
 810                     'ext': 'mp4',
 811                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 812                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 813                     'duration': 7335,
 814                     'upload_date': '20150721',
 815                     'uploader': 'Beer Games Beer',
 816                     'uploader_id': 'beergamesbeer',
 817                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 818                     'license': 'Standard YouTube License',
 819                 },
 820             }, {
 821                 'info_dict': {
 822                     'id': '6h8e8xoXJzg',
 823                     'ext': 'mp4',
 824                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 825                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 826                     'duration': 7337,
 827                     'upload_date': '20150721',
 828                     'uploader': 'Beer Games Beer',
 829                     'uploader_id': 'beergamesbeer',
 830                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 831                     'license': 'Standard YouTube License',
 832                 },
 833             }, {
 834                 'info_dict': {
 835                     'id': 'PUOgX5z9xZw',
 836                     'ext': 'mp4',
 837                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 838                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 839                     'duration': 7337,
 840                     'upload_date': '20150721',
 841                     'uploader': 'Beer Games Beer',
 842                     'uploader_id': 'beergamesbeer',
 843                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 844                     'license': 'Standard YouTube License',
 845                 },
 846             }, {
 847                 'info_dict': {
 848                     'id': 'teuwxikvS5k',
 849                     'ext': 'mp4',
 850                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 851                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 852                     'duration': 7334,
 853                     'upload_date': '20150721',
 854                     'uploader': 'Beer Games Beer',
 855                     'uploader_id': 'beergamesbeer',
 856                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 857                     'license': 'Standard YouTube License',
 858                 },
 859             }],
 860             'params': {
 861                 'skip_download': True,
 862             },
 863         },
 864         {
 865             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 866             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 867             'info_dict': {
 868                 'id': 'gVfLd0zydlo',
 869                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 870             },
 871             'playlist_count': 2,
 872             'skip': 'Not multifeed anymore',
 873         },
 874         {
 875             'url': 'https://vid.plus/FlRa-iH7PGw',
 876             'only_matching': True,
 877         },
 878         {
 879             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 880             'only_matching': True,
 881         },
 882         {
 883             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 884             # Also tests cut-off URL expansion in video description (see
 885             # https://github.com/rg3/youtube-dl/issues/1892,
 886             # https://github.com/rg3/youtube-dl/issues/8164)
 887             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 888             'info_dict': {
 889                 'id': 'lsguqyKfVQg',
 890                 'ext': 'mp4',
 891                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 892                 'alt_title': 'Dark Walk',
 893                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 894                 'duration': 133,
 895                 'upload_date': '20151119',
 896                 'uploader_id': 'IronSoulElf',
 897                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 898                 'uploader': 'IronSoulElf',
 899                 'license': 'Standard YouTube License',
 900                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
 901             },
 902             'params': {
 903                 'skip_download': True,
 904             },
 905         },
 906         {
 907             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 908             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 909             'only_matching': True,
 910         },
 911         {
 912             # Video with yt:stretch=17:0
 913             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 914             'info_dict': {
 915                 'id': 'Q39EVAstoRM',
 916                 'ext': 'mp4',
 917                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 918                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 919                 'upload_date': '20151107',
 920                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 921                 'uploader': 'CH GAMER DROID',
 922             },
 923             'params': {
 924                 'skip_download': True,
 925             },
 926             'skip': 'This video does not exist.',
 927         },
 928         {
 929             # Video licensed under Creative Commons
 930             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 931             'info_dict': {
 932                 'id': 'M4gD1WSo5mA',
 933                 'ext': 'mp4',
 934                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 935                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 936                 'duration': 721,
 937                 'upload_date': '20150127',
 938                 'uploader_id': 'BerkmanCenter',
 939                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 940                 'uploader': 'The Berkman Klein Center for Internet & Society',
 941                 'license': 'Creative Commons Attribution license (reuse allowed)',
 942             },
 943             'params': {
 944                 'skip_download': True,
 945             },
 946         },
 947         {
 948             # Channel-like uploader_url
 949             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 950             'info_dict': {
 951                 'id': 'eQcmzGIKrzg',
 952                 'ext': 'mp4',
 953                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 954                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 955                 'duration': 4060,
 956                 'upload_date': '20151119',
 957                 'uploader': 'Bernie 2016',
 958                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 959                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 960                 'license': 'Creative Commons Attribution license (reuse allowed)',
 961             },
 962             'params': {
 963                 'skip_download': True,
 964             },
 965         },
 966         {
 967             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 968             'only_matching': True,
 969         },
 970         {
 971             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 972             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 973             'only_matching': True,
 974         },
 975         {
 976             # Rental video preview
 977             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 978             'info_dict': {
 979                 'id': 'uGpuVWrhIzE',
 980                 'ext': 'mp4',
 981                 'title': 'Piku - Trailer',
 982                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 983                 'upload_date': '20150811',
 984                 'uploader': 'FlixMatrix',
 985                 'uploader_id': 'FlixMatrixKaravan',
 986                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 987                 'license': 'Standard YouTube License',
 988             },
 989             'params': {
 990                 'skip_download': True,
 991             },
 992         },
 993         {
 994             # YouTube Red video with episode data
 995             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
 996             'info_dict': {
 997                 'id': 'iqKdEhx-dD4',
 998                 'ext': 'mp4',
 999                 'title': 'Isolation - Mind Field (Ep 1)',
1000                 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
1001                 'duration': 2085,
1002                 'upload_date': '20170118',
1003                 'uploader': 'Vsauce',
1004                 'uploader_id': 'Vsauce',
1005                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1006                 'license': 'Standard YouTube License',
1007                 'series': 'Mind Field',
1008                 'season_number': 1,
1009                 'episode_number': 1,
1010             },
1011             'params': {
1012                 'skip_download': True,
1013             },
1014             'expected_warnings': [
1015                 'Skipping DASH manifest',
1016             ],
1017         },
1018         {
1019             # The following content has been identified by the YouTube community
1020             # as inappropriate or offensive to some audiences.
1021             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1022             'info_dict': {
1023                 'id': '6SJNVb0GnPI',
1024                 'ext': 'mp4',
1025                 'title': 'Race Differences in Intelligence',
1026                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1027                 'duration': 965,
1028                 'upload_date': '20140124',
1029                 'uploader': 'New Century Foundation',
1030                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1031                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1032                 'license': 'Standard YouTube License',
1033                 'view_count': int,
1034             },
1035             'params': {
1036                 'skip_download': True,
1037             },
1038         },
1039         {
1040             # itag 212
1041             'url': '1t24XAntNCY',
1042             'only_matching': True,
1043         },
1044         {
1045             # geo restricted to JP
1046             'url': 'sJL6WA-aGkQ',
1047             'only_matching': True,
1048         },
1049         {
1050             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1051             'only_matching': True,
1052         },
1053     ]
1054
1055     def __init__(self, *args, **kwargs):
1056         super(YoutubeIE, self).__init__(*args, **kwargs)
1057         self._player_cache = {}
1058
1059     def report_video_info_webpage_download(self, video_id):
1060         """Report attempt to download video info webpage."""
1061         self.to_screen('%s: Downloading video info webpage' % video_id)
1062
1063     def report_information_extraction(self, video_id):
1064         """Report attempt to extract video information."""
1065         self.to_screen('%s: Extracting video information' % video_id)
1066
1067     def report_unavailable_format(self, video_id, format):
1068         """Report extracted video URL."""
1069         self.to_screen('%s: Format %s not available' % (video_id, format))
1070
1071     def report_rtmp_download(self):
1072         """Indicate the download will use the RTMP protocol."""
1073         self.to_screen('RTMP download detected')
1074
1075     def _signature_cache_id(self, example_sig):
1076         """ Return a string representation of a signature """
1077         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1078
1079     def _extract_signature_function(self, video_id, player_url, example_sig):
1080         id_m = re.match(
1081             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1082             player_url)
1083         if not id_m:
1084             raise ExtractorError('Cannot identify player %r' % player_url)
1085         player_type = id_m.group('ext')
1086         player_id = id_m.group('id')
1087
1088         # Read from filesystem cache
1089         func_id = '%s_%s_%s' % (
1090             player_type, player_id, self._signature_cache_id(example_sig))
1091         assert os.path.basename(func_id) == func_id
1092
1093         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1094         if cache_spec is not None:
1095             return lambda s: ''.join(s[i] for i in cache_spec)
1096
1097         download_note = (
1098             'Downloading player %s' % player_url
1099             if self._downloader.params.get('verbose') else
1100             'Downloading %s player %s' % (player_type, player_id)
1101         )
1102         if player_type == 'js':
1103             code = self._download_webpage(
1104                 player_url, video_id,
1105                 note=download_note,
1106                 errnote='Download of %s failed' % player_url)
1107             res = self._parse_sig_js(code)
1108         elif player_type == 'swf':
1109             urlh = self._request_webpage(
1110                 player_url, video_id,
1111                 note=download_note,
1112                 errnote='Download of %s failed' % player_url)
1113             code = urlh.read()
1114             res = self._parse_sig_swf(code)
1115         else:
1116             assert False, 'Invalid player type %r' % player_type
1117
1118         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1119         cache_res = res(test_string)
1120         cache_spec = [ord(c) for c in cache_res]
1121
1122         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1123         return res
1124
1125     def _print_sig_code(self, func, example_sig):
1126         def gen_sig_code(idxs):
1127             def _genslice(start, end, step):
1128                 starts = '' if start == 0 else str(start)
1129                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1130                 steps = '' if step == 1 else (':%d' % step)
1131                 return 's[%s%s%s]' % (starts, ends, steps)
1132
1133             step = None
1134             # Quelch pyflakes warnings - start will be set when step is set
1135             start = '(Never used)'
1136             for i, prev in zip(idxs[1:], idxs[:-1]):
1137                 if step is not None:
1138                     if i - prev == step:
1139                         continue
1140                     yield _genslice(start, prev, step)
1141                     step = None
1142                     continue
1143                 if i - prev in [-1, 1]:
1144                     step = i - prev
1145                     start = prev
1146                     continue
1147                 else:
1148                     yield 's[%d]' % prev
1149             if step is None:
1150                 yield 's[%d]' % i
1151             else:
1152                 yield _genslice(start, i, step)
1153
1154         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1155         cache_res = func(test_string)
1156         cache_spec = [ord(c) for c in cache_res]
1157         expr_code = ' + '.join(gen_sig_code(cache_spec))
1158         signature_id_tuple = '(%s)' % (
1159             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1160         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1161                 '    return %s\n') % (signature_id_tuple, expr_code)
1162         self.to_screen('Extracted signature function:\n' + code)
1163
1164     def _parse_sig_js(self, jscode):
1165         funcname = self._search_regex(
1166             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1167              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1168             jscode, 'Initial JS player signature function name', group='sig')
1169
1170         jsi = JSInterpreter(jscode)
1171         initial_function = jsi.extract_function(funcname)
1172         return lambda s: initial_function([s])
1173
1174     def _parse_sig_swf(self, file_contents):
1175         swfi = SWFInterpreter(file_contents)
1176         TARGET_CLASSNAME = 'SignatureDecipher'
1177         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1178         initial_function = swfi.extract_function(searched_class, 'decipher')
1179         return lambda s: initial_function([s])
1180
1181     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1182         """Turn the encrypted s field into a working signature"""
1183
1184         if player_url is None:
1185             raise ExtractorError('Cannot decrypt signature without player_url')
1186
1187         if player_url.startswith('//'):
1188             player_url = 'https:' + player_url
1189         elif not re.match(r'https?://', player_url):
1190             player_url = compat_urlparse.urljoin(
1191                 'https://www.youtube.com', player_url)
1192         try:
1193             player_id = (player_url, self._signature_cache_id(s))
1194             if player_id not in self._player_cache:
1195                 func = self._extract_signature_function(
1196                     video_id, player_url, s
1197                 )
1198                 self._player_cache[player_id] = func
1199             func = self._player_cache[player_id]
1200             if self._downloader.params.get('youtube_print_sig_code'):
1201                 self._print_sig_code(func, s)
1202             return func(s)
1203         except Exception as e:
1204             tb = traceback.format_exc()
1205             raise ExtractorError(
1206                 'Signature extraction failed: ' + tb, cause=e)
1207
1208     def _get_subtitles(self, video_id, webpage):
1209         try:
1210             subs_doc = self._download_xml(
1211                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1212                 video_id, note=False)
1213         except ExtractorError as err:
1214             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1215             return {}
1216
1217         sub_lang_list = {}
1218         for track in subs_doc.findall('track'):
1219             lang = track.attrib['lang_code']
1220             if lang in sub_lang_list:
1221                 continue
1222             sub_formats = []
1223             for ext in self._SUBTITLE_FORMATS:
1224                 params = compat_urllib_parse_urlencode({
1225                     'lang': lang,
1226                     'v': video_id,
1227                     'fmt': ext,
1228                     'name': track.attrib['name'].encode('utf-8'),
1229                 })
1230                 sub_formats.append({
1231                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1232                     'ext': ext,
1233                 })
1234             sub_lang_list[lang] = sub_formats
1235         if not sub_lang_list:
1236             self._downloader.report_warning('video doesn\'t have subtitles')
1237             return {}
1238         return sub_lang_list
1239
1240     def _get_ytplayer_config(self, video_id, webpage):
1241         patterns = (
1242             # User data may contain arbitrary character sequences that may affect
1243             # JSON extraction with regex, e.g. when '};' is contained the second
1244             # regex won't capture the whole JSON. Yet working around by trying more
1245             # concrete regex first keeping in mind proper quoted string handling
1246             # to be implemented in future that will replace this workaround (see
1247             # https://github.com/rg3/youtube-dl/issues/7468,
1248             # https://github.com/rg3/youtube-dl/pull/7599)
1249             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1250             r';ytplayer\.config\s*=\s*({.+?});',
1251         )
1252         config = self._search_regex(
1253             patterns, webpage, 'ytplayer.config', default=None)
1254         if config:
1255             return self._parse_json(
1256                 uppercase_escape(config), video_id, fatal=False)
1257
1258     def _get_automatic_captions(self, video_id, webpage):
1259         """We need the webpage for getting the captions url, pass it as an
1260            argument to speed up the process."""
1261         self.to_screen('%s: Looking for automatic captions' % video_id)
1262         player_config = self._get_ytplayer_config(video_id, webpage)
1263         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1264         if not player_config:
1265             self._downloader.report_warning(err_msg)
1266             return {}
1267         try:
1268             args = player_config['args']
1269             caption_url = args.get('ttsurl')
1270             if caption_url:
1271                 timestamp = args['timestamp']
1272                 # We get the available subtitles
1273                 list_params = compat_urllib_parse_urlencode({
1274                     'type': 'list',
1275                     'tlangs': 1,
1276                     'asrs': 1,
1277                 })
1278                 list_url = caption_url + '&' + list_params
1279                 caption_list = self._download_xml(list_url, video_id)
1280                 original_lang_node = caption_list.find('track')
1281                 if original_lang_node is None:
1282                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1283                     return {}
1284                 original_lang = original_lang_node.attrib['lang_code']
1285                 caption_kind = original_lang_node.attrib.get('kind', '')
1286
1287                 sub_lang_list = {}
1288                 for lang_node in caption_list.findall('target'):
1289                     sub_lang = lang_node.attrib['lang_code']
1290                     sub_formats = []
1291                     for ext in self._SUBTITLE_FORMATS:
1292                         params = compat_urllib_parse_urlencode({
1293                             'lang': original_lang,
1294                             'tlang': sub_lang,
1295                             'fmt': ext,
1296                             'ts': timestamp,
1297                             'kind': caption_kind,
1298                         })
1299                         sub_formats.append({
1300                             'url': caption_url + '&' + params,
1301                             'ext': ext,
1302                         })
1303                     sub_lang_list[sub_lang] = sub_formats
1304                 return sub_lang_list
1305
1306             def make_captions(sub_url, sub_langs):
1307                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1308                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1309                 captions = {}
1310                 for sub_lang in sub_langs:
1311                     sub_formats = []
1312                     for ext in self._SUBTITLE_FORMATS:
1313                         caption_qs.update({
1314                             'tlang': [sub_lang],
1315                             'fmt': [ext],
1316                         })
1317                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1318                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1319                         sub_formats.append({
1320                             'url': sub_url,
1321                             'ext': ext,
1322                         })
1323                     captions[sub_lang] = sub_formats
1324                 return captions
1325
1326             # New captions format as of 22.06.2017
1327             player_response = args.get('player_response')
1328             if player_response and isinstance(player_response, compat_str):
1329                 player_response = self._parse_json(
1330                     player_response, video_id, fatal=False)
1331                 if player_response:
1332                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1333                     base_url = renderer['captionTracks'][0]['baseUrl']
1334                     sub_lang_list = []
1335                     for lang in renderer['translationLanguages']:
1336                         lang_code = lang.get('languageCode')
1337                         if lang_code:
1338                             sub_lang_list.append(lang_code)
1339                     return make_captions(base_url, sub_lang_list)
1340
1341             # Some videos don't provide ttsurl but rather caption_tracks and
1342             # caption_translation_languages (e.g. 20LmZk1hakA)
1343             # Does not used anymore as of 22.06.2017
1344             caption_tracks = args['caption_tracks']
1345             caption_translation_languages = args['caption_translation_languages']
1346             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1347             sub_lang_list = []
1348             for lang in caption_translation_languages.split(','):
1349                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1350                 sub_lang = lang_qs.get('lc', [None])[0]
1351                 if sub_lang:
1352                     sub_lang_list.append(sub_lang)
1353             return make_captions(caption_url, sub_lang_list)
1354         # An extractor error can be raise by the download process if there are
1355         # no automatic captions but there are subtitles
1356         except (KeyError, IndexError, ExtractorError):
1357             self._downloader.report_warning(err_msg)
1358             return {}
1359
1360     def _mark_watched(self, video_id, video_info):
1361         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1362         if not playback_url:
1363             return
1364         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1365         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1366
1367         # cpn generation algorithm is reverse engineered from base.js.
1368         # In fact it works even with dummy cpn.
1369         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1370         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1371
1372         qs.update({
1373             'ver': ['2'],
1374             'cpn': [cpn],
1375         })
1376         playback_url = compat_urlparse.urlunparse(
1377             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1378
1379         self._download_webpage(
1380             playback_url, video_id, 'Marking watched',
1381             'Unable to mark watched', fatal=False)
1382
1383     @staticmethod
1384     def _extract_urls(webpage):
1385         # Embedded YouTube player
1386         entries = [
1387             unescapeHTML(mobj.group('url'))
1388             for mobj in re.finditer(r'''(?x)
1389             (?:
1390                 <iframe[^>]+?src=|
1391                 data-video-url=|
1392                 <embed[^>]+?src=|
1393                 embedSWF\(?:\s*|
1394                 <object[^>]+data=|
1395                 new\s+SWFObject\(
1396             )
1397             (["\'])
1398                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1399                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1400             \1''', webpage)]
1401
1402         # lazyYT YouTube embed
1403         entries.extend(list(map(
1404             unescapeHTML,
1405             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1406
1407         # Wordpress "YouTube Video Importer" plugin
1408         matches = re.findall(r'''(?x)<div[^>]+
1409             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1410             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1411         entries.extend(m[-1] for m in matches)
1412
1413         return entries
1414
1415     @staticmethod
1416     def _extract_url(webpage):
1417         urls = YoutubeIE._extract_urls(webpage)
1418         return urls[0] if urls else None
1419
1420     @classmethod
1421     def extract_id(cls, url):
1422         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1423         if mobj is None:
1424             raise ExtractorError('Invalid URL: %s' % url)
1425         video_id = mobj.group(2)
1426         return video_id
1427
1428     def _extract_annotations(self, video_id):
1429         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1430         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1431
1432     @staticmethod
1433     def _extract_chapters(description, duration):
1434         if not description:
1435             return None
1436         chapter_lines = re.findall(
1437             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1438             description)
1439         if not chapter_lines:
1440             return None
1441         chapters = []
1442         for next_num, (chapter_line, time_point) in enumerate(
1443                 chapter_lines, start=1):
1444             start_time = parse_duration(time_point)
1445             if start_time is None:
1446                 continue
1447             if start_time > duration:
1448                 break
1449             end_time = (duration if next_num == len(chapter_lines)
1450                         else parse_duration(chapter_lines[next_num][1]))
1451             if end_time is None:
1452                 continue
1453             if end_time > duration:
1454                 end_time = duration
1455             if start_time > end_time:
1456                 break
1457             chapter_title = re.sub(
1458                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1459             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1460             chapters.append({
1461                 'start_time': start_time,
1462                 'end_time': end_time,
1463                 'title': chapter_title,
1464             })
1465         return chapters
1466
1467     def _real_extract(self, url):
1468         url, smuggled_data = unsmuggle_url(url, {})
1469
1470         proto = (
1471             'http' if self._downloader.params.get('prefer_insecure', False)
1472             else 'https')
1473
1474         start_time = None
1475         end_time = None
1476         parsed_url = compat_urllib_parse_urlparse(url)
1477         for component in [parsed_url.fragment, parsed_url.query]:
1478             query = compat_parse_qs(component)
1479             if start_time is None and 't' in query:
1480                 start_time = parse_duration(query['t'][0])
1481             if start_time is None and 'start' in query:
1482                 start_time = parse_duration(query['start'][0])
1483             if end_time is None and 'end' in query:
1484                 end_time = parse_duration(query['end'][0])
1485
1486         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1487         mobj = re.search(self._NEXT_URL_RE, url)
1488         if mobj:
1489             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1490         video_id = self.extract_id(url)
1491
1492         # Get video webpage
1493         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1494         video_webpage = self._download_webpage(url, video_id)
1495
1496         # Attempt to extract SWF player URL
1497         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1498         if mobj is not None:
1499             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1500         else:
1501             player_url = None
1502
1503         dash_mpds = []
1504
1505         def add_dash_mpd(video_info):
1506             dash_mpd = video_info.get('dashmpd')
1507             if dash_mpd and dash_mpd[0] not in dash_mpds:
1508                 dash_mpds.append(dash_mpd[0])
1509
1510         is_live = None
1511         view_count = None
1512
1513         def extract_view_count(v_info):
1514             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1515
1516         # Get video info
1517         embed_webpage = None
1518         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1519             age_gate = True
1520             # We simulate the access to the video from www.youtube.com/v/{video_id}
1521             # this can be viewed without login into Youtube
1522             url = proto + '://www.youtube.com/embed/%s' % video_id
1523             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1524             data = compat_urllib_parse_urlencode({
1525                 'video_id': video_id,
1526                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1527                 'sts': self._search_regex(
1528                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1529             })
1530             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1531             video_info_webpage = self._download_webpage(
1532                 video_info_url, video_id,
1533                 note='Refetching age-gated info webpage',
1534                 errnote='unable to download video info webpage')
1535             video_info = compat_parse_qs(video_info_webpage)
1536             add_dash_mpd(video_info)
1537         else:
1538             age_gate = False
1539             video_info = None
1540             sts = None
1541             # Try looking directly into the video webpage
1542             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1543             if ytplayer_config:
1544                 args = ytplayer_config['args']
1545                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1546                     # Convert to the same format returned by compat_parse_qs
1547                     video_info = dict((k, [v]) for k, v in args.items())
1548                     add_dash_mpd(video_info)
1549                 # Rental video is not rented but preview is available (e.g.
1550                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1551                 # https://github.com/rg3/youtube-dl/issues/10532)
1552                 if not video_info and args.get('ypc_vid'):
1553                     return self.url_result(
1554                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1555                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1556                     is_live = True
1557                 sts = ytplayer_config.get('sts')
1558             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1559                 # We also try looking in get_video_info since it may contain different dashmpd
1560                 # URL that points to a DASH manifest with possibly different itag set (some itags
1561                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1562                 # manifest pointed by get_video_info's dashmpd).
1563                 # The general idea is to take a union of itags of both DASH manifests (for example
1564                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1565                 self.report_video_info_webpage_download(video_id)
1566                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1567                     query = {
1568                         'video_id': video_id,
1569                         'ps': 'default',
1570                         'eurl': '',
1571                         'gl': 'US',
1572                         'hl': 'en',
1573                     }
1574                     if el:
1575                         query['el'] = el
1576                     if sts:
1577                         query['sts'] = sts
1578                     video_info_webpage = self._download_webpage(
1579                         '%s://www.youtube.com/get_video_info' % proto,
1580                         video_id, note=False,
1581                         errnote='unable to download video info webpage',
1582                         fatal=False, query=query)
1583                     if not video_info_webpage:
1584                         continue
1585                     get_video_info = compat_parse_qs(video_info_webpage)
1586                     add_dash_mpd(get_video_info)
1587                     if view_count is None:
1588                         view_count = extract_view_count(get_video_info)
1589                     if not video_info:
1590                         video_info = get_video_info
1591                     if 'token' in get_video_info:
1592                         # Different get_video_info requests may report different results, e.g.
1593                         # some may report video unavailability, but some may serve it without
1594                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1595                         # the original webpage as well as el=info and el=embedded get_video_info
1596                         # requests report video unavailability due to geo restriction while
1597                         # el=detailpage succeeds and returns valid data). This is probably
1598                         # due to YouTube measures against IP ranges of hosting providers.
1599                         # Working around by preferring the first succeeded video_info containing
1600                         # the token if no such video_info yet was found.
1601                         if 'token' not in video_info:
1602                             video_info = get_video_info
1603                         break
1604
1605         def extract_unavailable_message():
1606             return self._html_search_regex(
1607                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1608                 video_webpage, 'unavailable message', default=None)
1609
1610         if 'token' not in video_info:
1611             if 'reason' in video_info:
1612                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1613                     regions_allowed = self._html_search_meta(
1614                         'regionsAllowed', video_webpage, default=None)
1615                     countries = regions_allowed.split(',') if regions_allowed else None
1616                     self.raise_geo_restricted(
1617                         msg=video_info['reason'][0], countries=countries)
1618                 reason = video_info['reason'][0]
1619                 if 'Invalid parameters' in reason:
1620                     unavailable_message = extract_unavailable_message()
1621                     if unavailable_message:
1622                         reason = unavailable_message
1623                 raise ExtractorError(
1624                     'YouTube said: %s' % reason,
1625                     expected=True, video_id=video_id)
1626             else:
1627                 raise ExtractorError(
1628                     '"token" parameter not in video info for unknown reason',
1629                     video_id=video_id)
1630
1631         # title
1632         if 'title' in video_info:
1633             video_title = video_info['title'][0]
1634         else:
1635             self._downloader.report_warning('Unable to extract video title')
1636             video_title = '_'
1637
1638         # description
1639         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1640         if video_description:
1641
1642             def replace_url(m):
1643                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1644                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1645                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1646                     qs = compat_parse_qs(parsed_redir_url.query)
1647                     q = qs.get('q')
1648                     if q and q[0]:
1649                         return q[0]
1650                 return redir_url
1651
1652             description_original = video_description = re.sub(r'''(?x)
1653                 <a\s+
1654                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1655                     (?:title|href)="([^"]+)"\s+
1656                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1657                     class="[^"]*"[^>]*>
1658                 [^<]+\.{3}\s*
1659                 </a>
1660             ''', replace_url, video_description)
1661             video_description = clean_html(video_description)
1662         else:
1663             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1664             if fd_mobj:
1665                 video_description = unescapeHTML(fd_mobj.group(1))
1666             else:
1667                 video_description = ''
1668
1669         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1670             if not self._downloader.params.get('noplaylist'):
1671                 entries = []
1672                 feed_ids = []
1673                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1674                 for feed in multifeed_metadata_list.split(','):
1675                     # Unquote should take place before split on comma (,) since textual
1676                     # fields may contain comma as well (see
1677                     # https://github.com/rg3/youtube-dl/issues/8536)
1678                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1679                     entries.append({
1680                         '_type': 'url_transparent',
1681                         'ie_key': 'Youtube',
1682                         'url': smuggle_url(
1683                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1684                             {'force_singlefeed': True}),
1685                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1686                     })
1687                     feed_ids.append(feed_data['id'][0])
1688                 self.to_screen(
1689                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1690                     % (', '.join(feed_ids), video_id))
1691                 return self.playlist_result(entries, video_id, video_title, video_description)
1692             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1693
1694         if view_count is None:
1695             view_count = extract_view_count(video_info)
1696
1697         # Check for "rental" videos
1698         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1699             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1700
1701         # Start extracting information
1702         self.report_information_extraction(video_id)
1703
1704         # uploader
1705         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1706         if video_uploader:
1707             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1708         else:
1709             self._downloader.report_warning('unable to extract uploader name')
1710
1711         # uploader_id
1712         video_uploader_id = None
1713         video_uploader_url = None
1714         mobj = re.search(
1715             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1716             video_webpage)
1717         if mobj is not None:
1718             video_uploader_id = mobj.group('uploader_id')
1719             video_uploader_url = mobj.group('uploader_url')
1720         else:
1721             self._downloader.report_warning('unable to extract uploader nickname')
1722
1723         # thumbnail image
1724         # We try first to get a high quality image:
1725         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1726                             video_webpage, re.DOTALL)
1727         if m_thumb is not None:
1728             video_thumbnail = m_thumb.group(1)
1729         elif 'thumbnail_url' not in video_info:
1730             self._downloader.report_warning('unable to extract video thumbnail')
1731             video_thumbnail = None
1732         else:   # don't panic if we can't find it
1733             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1734
1735         # upload date
1736         upload_date = self._html_search_meta(
1737             'datePublished', video_webpage, 'upload date', default=None)
1738         if not upload_date:
1739             upload_date = self._search_regex(
1740                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1741                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1742                 video_webpage, 'upload date', default=None)
1743         upload_date = unified_strdate(upload_date)
1744
1745         video_license = self._html_search_regex(
1746             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1747             video_webpage, 'license', default=None)
1748
1749         m_music = re.search(
1750             r'''(?x)
1751                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1752                 <ul[^>]*>\s*
1753                 <li>(?P<title>.+?)
1754                 by (?P<creator>.+?)
1755                 (?:
1756                     \(.+?\)|
1757                     <a[^>]*
1758                         (?:
1759                             \bhref=["\']/red[^>]*>|             # drop possible
1760                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1761                         )
1762                     .*?
1763                 )?</li
1764             ''',
1765             video_webpage)
1766         if m_music:
1767             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1768             video_creator = clean_html(m_music.group('creator'))
1769         else:
1770             video_alt_title = video_creator = None
1771
1772         def extract_meta(field):
1773             return self._html_search_regex(
1774                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1775                 video_webpage, field, default=None)
1776
1777         track = extract_meta('Song')
1778         artist = extract_meta('Artist')
1779
1780         m_episode = re.search(
1781             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1782             video_webpage)
1783         if m_episode:
1784             series = m_episode.group('series')
1785             season_number = int(m_episode.group('season'))
1786             episode_number = int(m_episode.group('episode'))
1787         else:
1788             series = season_number = episode_number = None
1789
1790         m_cat_container = self._search_regex(
1791             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1792             video_webpage, 'categories', default=None)
1793         if m_cat_container:
1794             category = self._html_search_regex(
1795                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1796                 default=None)
1797             video_categories = None if category is None else [category]
1798         else:
1799             video_categories = None
1800
1801         video_tags = [
1802             unescapeHTML(m.group('content'))
1803             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1804
1805         def _extract_count(count_name):
1806             return str_to_int(self._search_regex(
1807                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1808                 % re.escape(count_name),
1809                 video_webpage, count_name, default=None))
1810
1811         like_count = _extract_count('like')
1812         dislike_count = _extract_count('dislike')
1813
1814         # subtitles
1815         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1816         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1817
1818         video_duration = try_get(
1819             video_info, lambda x: int_or_none(x['length_seconds'][0]))
1820         if not video_duration:
1821             video_duration = parse_duration(self._html_search_meta(
1822                 'duration', video_webpage, 'video duration'))
1823
1824         # annotations
1825         video_annotations = None
1826         if self._downloader.params.get('writeannotations', False):
1827             video_annotations = self._extract_annotations(video_id)
1828
1829         chapters = self._extract_chapters(description_original, video_duration)
1830
1831         def _extract_filesize(media_url):
1832             return int_or_none(self._search_regex(
1833                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1834
1835         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1836             self.report_rtmp_download()
1837             formats = [{
1838                 'format_id': '_rtmp',
1839                 'protocol': 'rtmp',
1840                 'url': video_info['conn'][0],
1841                 'player_url': player_url,
1842             }]
1843         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1844             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1845             if 'rtmpe%3Dyes' in encoded_url_map:
1846                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1847             formats_spec = {}
1848             fmt_list = video_info.get('fmt_list', [''])[0]
1849             if fmt_list:
1850                 for fmt in fmt_list.split(','):
1851                     spec = fmt.split('/')
1852                     if len(spec) > 1:
1853                         width_height = spec[1].split('x')
1854                         if len(width_height) == 2:
1855                             formats_spec[spec[0]] = {
1856                                 'resolution': spec[1],
1857                                 'width': int_or_none(width_height[0]),
1858                                 'height': int_or_none(width_height[1]),
1859                             }
1860             q = qualities(['small', 'medium', 'hd720'])
1861             formats = []
1862             for url_data_str in encoded_url_map.split(','):
1863                 url_data = compat_parse_qs(url_data_str)
1864                 if 'itag' not in url_data or 'url' not in url_data:
1865                     continue
1866                 format_id = url_data['itag'][0]
1867                 url = url_data['url'][0]
1868
1869                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1870                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1871                     jsplayer_url_json = self._search_regex(
1872                         ASSETS_RE,
1873                         embed_webpage if age_gate else video_webpage,
1874                         'JS player URL (1)', default=None)
1875                     if not jsplayer_url_json and not age_gate:
1876                         # We need the embed website after all
1877                         if embed_webpage is None:
1878                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1879                             embed_webpage = self._download_webpage(
1880                                 embed_url, video_id, 'Downloading embed webpage')
1881                         jsplayer_url_json = self._search_regex(
1882                             ASSETS_RE, embed_webpage, 'JS player URL')
1883
1884                     player_url = json.loads(jsplayer_url_json)
1885                     if player_url is None:
1886                         player_url_json = self._search_regex(
1887                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1888                             video_webpage, 'age gate player URL')
1889                         player_url = json.loads(player_url_json)
1890
1891                 if 'sig' in url_data:
1892                     url += '&signature=' + url_data['sig'][0]
1893                 elif 's' in url_data:
1894                     encrypted_sig = url_data['s'][0]
1895
1896                     if self._downloader.params.get('verbose'):
1897                         if player_url is None:
1898                             player_version = 'unknown'
1899                             player_desc = 'unknown'
1900                         else:
1901                             if player_url.endswith('swf'):
1902                                 player_version = self._search_regex(
1903                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1904                                     'flash player', fatal=False)
1905                                 player_desc = 'flash player %s' % player_version
1906                             else:
1907                                 player_version = self._search_regex(
1908                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1909                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1910                                     player_url,
1911                                     'html5 player', fatal=False)
1912                                 player_desc = 'html5 player %s' % player_version
1913
1914                         parts_sizes = self._signature_cache_id(encrypted_sig)
1915                         self.to_screen('{%s} signature length %s, %s' %
1916                                        (format_id, parts_sizes, player_desc))
1917
1918                     signature = self._decrypt_signature(
1919                         encrypted_sig, video_id, player_url, age_gate)
1920                     url += '&signature=' + signature
1921                 if 'ratebypass' not in url:
1922                     url += '&ratebypass=yes'
1923
1924                 dct = {
1925                     'format_id': format_id,
1926                     'url': url,
1927                     'player_url': player_url,
1928                 }
1929                 if format_id in self._formats:
1930                     dct.update(self._formats[format_id])
1931                 if format_id in formats_spec:
1932                     dct.update(formats_spec[format_id])
1933
1934                 # Some itags are not included in DASH manifest thus corresponding formats will
1935                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1936                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1937                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1938                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1939
1940                 filesize = int_or_none(url_data.get(
1941                     'clen', [None])[0]) or _extract_filesize(url)
1942
1943                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1944
1945                 more_fields = {
1946                     'filesize': filesize,
1947                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1948                     'width': width,
1949                     'height': height,
1950                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1951                     'format_note': quality,
1952                     'quality': q(quality),
1953                 }
1954                 for key, value in more_fields.items():
1955                     if value:
1956                         dct[key] = value
1957                 type_ = url_data.get('type', [None])[0]
1958                 if type_:
1959                     type_split = type_.split(';')
1960                     kind_ext = type_split[0].split('/')
1961                     if len(kind_ext) == 2:
1962                         kind, _ = kind_ext
1963                         dct['ext'] = mimetype2ext(type_split[0])
1964                         if kind in ('audio', 'video'):
1965                             codecs = None
1966                             for mobj in re.finditer(
1967                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1968                                 if mobj.group('key') == 'codecs':
1969                                     codecs = mobj.group('val')
1970                                     break
1971                             if codecs:
1972                                 dct.update(parse_codecs(codecs))
1973                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1974                     dct['downloader_options'] = {
1975                         # Youtube throttles chunks >~10M
1976                         'http_chunk_size': 10485760,
1977                     }
1978                 formats.append(dct)
1979         elif video_info.get('hlsvp'):
1980             manifest_url = video_info['hlsvp'][0]
1981             formats = []
1982             m3u8_formats = self._extract_m3u8_formats(
1983                 manifest_url, video_id, 'mp4', fatal=False)
1984             for a_format in m3u8_formats:
1985                 itag = self._search_regex(
1986                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1987                 if itag:
1988                     a_format['format_id'] = itag
1989                     if itag in self._formats:
1990                         dct = self._formats[itag].copy()
1991                         dct.update(a_format)
1992                         a_format = dct
1993                 a_format['player_url'] = player_url
1994                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1995                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1996                 formats.append(a_format)
1997         else:
1998             error_message = clean_html(video_info.get('reason', [None])[0])
1999             if not error_message:
2000                 error_message = extract_unavailable_message()
2001             if error_message:
2002                 raise ExtractorError(error_message, expected=True)
2003             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
2004
2005         # Look for the DASH manifest
2006         if self._downloader.params.get('youtube_include_dash_manifest', True):
2007             dash_mpd_fatal = True
2008             for mpd_url in dash_mpds:
2009                 dash_formats = {}
2010                 try:
2011                     def decrypt_sig(mobj):
2012                         s = mobj.group(1)
2013                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2014                         return '/signature/%s' % dec_s
2015
2016                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2017
2018                     for df in self._extract_mpd_formats(
2019                             mpd_url, video_id, fatal=dash_mpd_fatal,
2020                             formats_dict=self._formats):
2021                         if not df.get('filesize'):
2022                             df['filesize'] = _extract_filesize(df['url'])
2023                         # Do not overwrite DASH format found in some previous DASH manifest
2024                         if df['format_id'] not in dash_formats:
2025                             dash_formats[df['format_id']] = df
2026                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2027                         # allow them to fail without bug report message if we already have
2028                         # some DASH manifest succeeded. This is temporary workaround to reduce
2029                         # burst of bug reports until we figure out the reason and whether it
2030                         # can be fixed at all.
2031                         dash_mpd_fatal = False
2032                 except (ExtractorError, KeyError) as e:
2033                     self.report_warning(
2034                         'Skipping DASH manifest: %r' % e, video_id)
2035                 if dash_formats:
2036                     # Remove the formats we found through non-DASH, they
2037                     # contain less info and it can be wrong, because we use
2038                     # fixed values (for example the resolution). See
2039                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2040                     # example.
2041                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2042                     formats.extend(dash_formats.values())
2043
2044         # Check for malformed aspect ratio
2045         stretched_m = re.search(
2046             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2047             video_webpage)
2048         if stretched_m:
2049             w = float(stretched_m.group('w'))
2050             h = float(stretched_m.group('h'))
2051             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2052             # We will only process correct ratios.
2053             if w > 0 and h > 0:
2054                 ratio = w / h
2055                 for f in formats:
2056                     if f.get('vcodec') != 'none':
2057                         f['stretched_ratio'] = ratio
2058
2059         self._sort_formats(formats)
2060
2061         self.mark_watched(video_id, video_info)
2062
2063         return {
2064             'id': video_id,
2065             'uploader': video_uploader,
2066             'uploader_id': video_uploader_id,
2067             'uploader_url': video_uploader_url,
2068             'upload_date': upload_date,
2069             'license': video_license,
2070             'creator': video_creator or artist,
2071             'title': video_title,
2072             'alt_title': video_alt_title or track,
2073             'thumbnail': video_thumbnail,
2074             'description': video_description,
2075             'categories': video_categories,
2076             'tags': video_tags,
2077             'subtitles': video_subtitles,
2078             'automatic_captions': automatic_captions,
2079             'duration': video_duration,
2080             'age_limit': 18 if age_gate else 0,
2081             'annotations': video_annotations,
2082             'chapters': chapters,
2083             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2084             'view_count': view_count,
2085             'like_count': like_count,
2086             'dislike_count': dislike_count,
2087             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2088             'formats': formats,
2089             'is_live': is_live,
2090             'start_time': start_time,
2091             'end_time': end_time,
2092             'series': series,
2093             'season_number': season_number,
2094             'episode_number': episode_number,
2095             'track': track,
2096             'artist': artist,
2097         }
2098
2099
2100 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2101     IE_DESC = 'YouTube.com playlists'
2102     _VALID_URL = r"""(?x)(?:
2103                         (?:https?://)?
2104                         (?:\w+\.)?
2105                         (?:
2106                             youtube\.com/
2107                             (?:
2108                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2109                                \? (?:.*?[&;])*? (?:p|a|list)=
2110                             |  p/
2111                             )|
2112                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2113                         )
2114                         (
2115                             (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
2116                             # Top tracks, they can also include dots
2117                             |(?:MC)[\w\.]*
2118                         )
2119                         .*
2120                      |
2121                         (%(playlist_id)s)
2122                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2123     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2124     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2125     IE_NAME = 'youtube:playlist'
2126     _TESTS = [{
2127         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2128         'info_dict': {
2129             'title': 'ytdl test PL',
2130             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2131         },
2132         'playlist_count': 3,
2133     }, {
2134         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2135         'info_dict': {
2136             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2137             'title': 'YDL_Empty_List',
2138         },
2139         'playlist_count': 0,
2140         'skip': 'This playlist is private',
2141     }, {
2142         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2143         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2144         'info_dict': {
2145             'title': '29C3: Not my department',
2146             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2147         },
2148         'playlist_count': 95,
2149     }, {
2150         'note': 'issue #673',
2151         'url': 'PLBB231211A4F62143',
2152         'info_dict': {
2153             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2154             'id': 'PLBB231211A4F62143',
2155         },
2156         'playlist_mincount': 26,
2157     }, {
2158         'note': 'Large playlist',
2159         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2160         'info_dict': {
2161             'title': 'Uploads from Cauchemar',
2162             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2163         },
2164         'playlist_mincount': 799,
2165     }, {
2166         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2167         'info_dict': {
2168             'title': 'YDL_safe_search',
2169             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2170         },
2171         'playlist_count': 2,
2172         'skip': 'This playlist is private',
2173     }, {
2174         'note': 'embedded',
2175         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2176         'playlist_count': 4,
2177         'info_dict': {
2178             'title': 'JODA15',
2179             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2180         }
2181     }, {
2182         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2183         'playlist_mincount': 485,
2184         'info_dict': {
2185             'title': '2017 華語最新單曲 (2/24更新)',
2186             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2187         }
2188     }, {
2189         'note': 'Embedded SWF player',
2190         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2191         'playlist_count': 4,
2192         'info_dict': {
2193             'title': 'JODA7',
2194             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2195         }
2196     }, {
2197         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2198         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2199         'info_dict': {
2200             'title': 'Uploads from Interstellar Movie',
2201             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2202         },
2203         'playlist_mincount': 21,
2204     }, {
2205         # Playlist URL that does not actually serve a playlist
2206         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2207         'info_dict': {
2208             'id': 'FqZTN594JQw',
2209             'ext': 'webm',
2210             'title': "Smiley's People 01 detective, Adventure Series, Action",
2211             'uploader': 'STREEM',
2212             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2213             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2214             'upload_date': '20150526',
2215             'license': 'Standard YouTube License',
2216             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2217             'categories': ['People & Blogs'],
2218             'tags': list,
2219             'like_count': int,
2220             'dislike_count': int,
2221         },
2222         'params': {
2223             'skip_download': True,
2224         },
2225         'add_ie': [YoutubeIE.ie_key()],
2226     }, {
2227         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2228         'info_dict': {
2229             'id': 'yeWKywCrFtk',
2230             'ext': 'mp4',
2231             'title': 'Small Scale Baler and Braiding Rugs',
2232             'uploader': 'Backus-Page House Museum',
2233             'uploader_id': 'backuspagemuseum',
2234             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2235             'upload_date': '20161008',
2236             'license': 'Standard YouTube License',
2237             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2238             'categories': ['Nonprofits & Activism'],
2239             'tags': list,
2240             'like_count': int,
2241             'dislike_count': int,
2242         },
2243         'params': {
2244             'noplaylist': True,
2245             'skip_download': True,
2246         },
2247     }, {
2248         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2249         'only_matching': True,
2250     }, {
2251         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2252         'only_matching': True,
2253     }]
2254
2255     def _real_initialize(self):
2256         self._login()
2257
2258     def _extract_mix(self, playlist_id):
2259         # The mixes are generated from a single video
2260         # the id of the playlist is just 'RD' + video_id
2261         ids = []
2262         last_id = playlist_id[-11:]
2263         for n in itertools.count(1):
2264             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2265             webpage = self._download_webpage(
2266                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2267             new_ids = orderedSet(re.findall(
2268                 r'''(?xs)data-video-username=".*?".*?
2269                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2270                 webpage))
2271             # Fetch new pages until all the videos are repeated, it seems that
2272             # there are always 51 unique videos.
2273             new_ids = [_id for _id in new_ids if _id not in ids]
2274             if not new_ids:
2275                 break
2276             ids.extend(new_ids)
2277             last_id = ids[-1]
2278
2279         url_results = self._ids_to_results(ids)
2280
2281         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2282         title_span = (
2283             search_title('playlist-title') or
2284             search_title('title long-title') or
2285             search_title('title'))
2286         title = clean_html(title_span)
2287
2288         return self.playlist_result(url_results, playlist_id, title)
2289
2290     def _extract_playlist(self, playlist_id):
2291         url = self._TEMPLATE_URL % playlist_id
2292         page = self._download_webpage(url, playlist_id)
2293
2294         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2295         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2296             match = match.strip()
2297             # Check if the playlist exists or is private
2298             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2299             if mobj:
2300                 reason = mobj.group('reason')
2301                 message = 'This playlist %s' % reason
2302                 if 'private' in reason:
2303                     message += ', use --username or --netrc to access it'
2304                 message += '.'
2305                 raise ExtractorError(message, expected=True)
2306             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2307                 raise ExtractorError(
2308                     'Invalid parameters. Maybe URL is incorrect.',
2309                     expected=True)
2310             elif re.match(r'[^<]*Choose your language[^<]*', match):
2311                 continue
2312             else:
2313                 self.report_warning('Youtube gives an alert message: ' + match)
2314
2315         playlist_title = self._html_search_regex(
2316             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2317             page, 'title', default=None)
2318
2319         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2320         uploader = self._search_regex(
2321             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2322             page, 'uploader', default=None)
2323         mobj = re.search(
2324             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2325             page)
2326         if mobj:
2327             uploader_id = mobj.group('uploader_id')
2328             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2329         else:
2330             uploader_id = uploader_url = None
2331
2332         has_videos = True
2333
2334         if not playlist_title:
2335             try:
2336                 # Some playlist URLs don't actually serve a playlist (e.g.
2337                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2338                 next(self._entries(page, playlist_id))
2339             except StopIteration:
2340                 has_videos = False
2341
2342         playlist = self.playlist_result(
2343             self._entries(page, playlist_id), playlist_id, playlist_title)
2344         playlist.update({
2345             'uploader': uploader,
2346             'uploader_id': uploader_id,
2347             'uploader_url': uploader_url,
2348         })
2349
2350         return has_videos, playlist
2351
2352     def _check_download_just_video(self, url, playlist_id):
2353         # Check if it's a video-specific URL
2354         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2355         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2356             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2357             'video id', default=None)
2358         if video_id:
2359             if self._downloader.params.get('noplaylist'):
2360                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2361                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2362             else:
2363                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2364                 return video_id, None
2365         return None, None
2366
2367     def _real_extract(self, url):
2368         # Extract playlist id
2369         mobj = re.match(self._VALID_URL, url)
2370         if mobj is None:
2371             raise ExtractorError('Invalid URL: %s' % url)
2372         playlist_id = mobj.group(1) or mobj.group(2)
2373
2374         video_id, video = self._check_download_just_video(url, playlist_id)
2375         if video:
2376             return video
2377
2378         if playlist_id.startswith(('RD', 'UL', 'PU')):
2379             # Mixes require a custom extraction process
2380             return self._extract_mix(playlist_id)
2381
2382         has_videos, playlist = self._extract_playlist(playlist_id)
2383         if has_videos or not video_id:
2384             return playlist
2385
2386         # Some playlist URLs don't actually serve a playlist (see
2387         # https://github.com/rg3/youtube-dl/issues/10537).
2388         # Fallback to plain video extraction if there is a video id
2389         # along with playlist id.
2390         return self.url_result(video_id, 'Youtube', video_id=video_id)
2391
2392
2393 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2394     IE_DESC = 'YouTube.com channels'
2395     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2396     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2397     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2398     IE_NAME = 'youtube:channel'
2399     _TESTS = [{
2400         'note': 'paginated channel',
2401         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2402         'playlist_mincount': 91,
2403         'info_dict': {
2404             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2405             'title': 'Uploads from lex will',
2406         }
2407     }, {
2408         'note': 'Age restricted channel',
2409         # from https://www.youtube.com/user/DeusExOfficial
2410         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2411         'playlist_mincount': 64,
2412         'info_dict': {
2413             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2414             'title': 'Uploads from Deus Ex',
2415         },
2416     }]
2417
2418     @classmethod
2419     def suitable(cls, url):
2420         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2421                 else super(YoutubeChannelIE, cls).suitable(url))
2422
2423     def _build_template_url(self, url, channel_id):
2424         return self._TEMPLATE_URL % channel_id
2425
2426     def _real_extract(self, url):
2427         channel_id = self._match_id(url)
2428
2429         url = self._build_template_url(url, channel_id)
2430
2431         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2432         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2433         # otherwise fallback on channel by page extraction
2434         channel_page = self._download_webpage(
2435             url + '?view=57', channel_id,
2436             'Downloading channel page', fatal=False)
2437         if channel_page is False:
2438             channel_playlist_id = False
2439         else:
2440             channel_playlist_id = self._html_search_meta(
2441                 'channelId', channel_page, 'channel id', default=None)
2442             if not channel_playlist_id:
2443                 channel_url = self._html_search_meta(
2444                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2445                     channel_page, 'channel url', default=None)
2446                 if channel_url:
2447                     channel_playlist_id = self._search_regex(
2448                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2449                         channel_url, 'channel id', default=None)
2450         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2451             playlist_id = 'UU' + channel_playlist_id[2:]
2452             return self.url_result(
2453                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2454
2455         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2456         autogenerated = re.search(r'''(?x)
2457                 class="[^"]*?(?:
2458                     channel-header-autogenerated-label|
2459                     yt-channel-title-autogenerated
2460                 )[^"]*"''', channel_page) is not None
2461
2462         if autogenerated:
2463             # The videos are contained in a single page
2464             # the ajax pages can't be used, they are empty
2465             entries = [
2466                 self.url_result(
2467                     video_id, 'Youtube', video_id=video_id,
2468                     video_title=video_title)
2469                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2470             return self.playlist_result(entries, channel_id)
2471
2472         try:
2473             next(self._entries(channel_page, channel_id))
2474         except StopIteration:
2475             alert_message = self._html_search_regex(
2476                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2477                 channel_page, 'alert', default=None, group='alert')
2478             if alert_message:
2479                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2480
2481         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2482
2483
2484 class YoutubeUserIE(YoutubeChannelIE):
2485     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2486     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2487     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2488     IE_NAME = 'youtube:user'
2489
2490     _TESTS = [{
2491         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2492         'playlist_mincount': 320,
2493         'info_dict': {
2494             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2495             'title': 'Uploads from The Linux Foundation',
2496         }
2497     }, {
2498         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2499         # but not https://www.youtube.com/user/12minuteathlete/videos
2500         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2501         'playlist_mincount': 249,
2502         'info_dict': {
2503             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2504             'title': 'Uploads from 12 Minute Athlete',
2505         }
2506     }, {
2507         'url': 'ytuser:phihag',
2508         'only_matching': True,
2509     }, {
2510         'url': 'https://www.youtube.com/c/gametrailers',
2511         'only_matching': True,
2512     }, {
2513         'url': 'https://www.youtube.com/gametrailers',
2514         'only_matching': True,
2515     }, {
2516         # This channel is not available, geo restricted to JP
2517         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2518         'only_matching': True,
2519     }]
2520
2521     @classmethod
2522     def suitable(cls, url):
2523         # Don't return True if the url can be extracted with other youtube
2524         # extractor, the regex would is too permissive and it would match.
2525         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2526         if any(ie.suitable(url) for ie in other_yt_ies):
2527             return False
2528         else:
2529             return super(YoutubeUserIE, cls).suitable(url)
2530
2531     def _build_template_url(self, url, channel_id):
2532         mobj = re.match(self._VALID_URL, url)
2533         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2534
2535
2536 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2537     IE_DESC = 'YouTube.com live streams'
2538     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2539     IE_NAME = 'youtube:live'
2540
2541     _TESTS = [{
2542         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2543         'info_dict': {
2544             'id': 'a48o2S1cPoo',
2545             'ext': 'mp4',
2546             'title': 'The Young Turks - Live Main Show',
2547             'uploader': 'The Young Turks',
2548             'uploader_id': 'TheYoungTurks',
2549             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2550             'upload_date': '20150715',
2551             'license': 'Standard YouTube License',
2552             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2553             'categories': ['News & Politics'],
2554             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2555             'like_count': int,
2556             'dislike_count': int,
2557         },
2558         'params': {
2559             'skip_download': True,
2560         },
2561     }, {
2562         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2563         'only_matching': True,
2564     }, {
2565         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2566         'only_matching': True,
2567     }, {
2568         'url': 'https://www.youtube.com/TheYoungTurks/live',
2569         'only_matching': True,
2570     }]
2571
2572     def _real_extract(self, url):
2573         mobj = re.match(self._VALID_URL, url)
2574         channel_id = mobj.group('id')
2575         base_url = mobj.group('base_url')
2576         webpage = self._download_webpage(url, channel_id, fatal=False)
2577         if webpage:
2578             page_type = self._og_search_property(
2579                 'type', webpage, 'page type', default='')
2580             video_id = self._html_search_meta(
2581                 'videoId', webpage, 'video id', default=None)
2582             if page_type.startswith('video') and video_id and re.match(
2583                     r'^[0-9A-Za-z_-]{11}$', video_id):
2584                 return self.url_result(video_id, YoutubeIE.ie_key())
2585         return self.url_result(base_url)
2586
2587
2588 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2589     IE_DESC = 'YouTube.com user/channel playlists'
2590     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2591     IE_NAME = 'youtube:playlists'
2592
2593     _TESTS = [{
2594         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2595         'playlist_mincount': 4,
2596         'info_dict': {
2597             'id': 'ThirstForScience',
2598             'title': 'Thirst for Science',
2599         },
2600     }, {
2601         # with "Load more" button
2602         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2603         'playlist_mincount': 70,
2604         'info_dict': {
2605             'id': 'igorkle1',
2606             'title': 'Игорь Клейнер',
2607         },
2608     }, {
2609         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2610         'playlist_mincount': 17,
2611         'info_dict': {
2612             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2613             'title': 'Chem Player',
2614         },
2615     }]
2616
2617
2618 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2619     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2620
2621
2622 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2623     IE_DESC = 'YouTube.com searches'
2624     # there doesn't appear to be a real limit, for example if you search for
2625     # 'python' you get more than 8.000.000 results
2626     _MAX_RESULTS = float('inf')
2627     IE_NAME = 'youtube:search'
2628     _SEARCH_KEY = 'ytsearch'
2629     _EXTRA_QUERY_ARGS = {}
2630     _TESTS = []
2631
2632     def _get_n_results(self, query, n):
2633         """Get a specified number of results for a query"""
2634
2635         videos = []
2636         limit = n
2637
2638         url_query = {
2639             'search_query': query.encode('utf-8'),
2640         }
2641         url_query.update(self._EXTRA_QUERY_ARGS)
2642         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2643
2644         for pagenum in itertools.count(1):
2645             data = self._download_json(
2646                 result_url, video_id='query "%s"' % query,
2647                 note='Downloading page %s' % pagenum,
2648                 errnote='Unable to download API page',
2649                 query={'spf': 'navigate'})
2650             html_content = data[1]['body']['content']
2651
2652             if 'class="search-message' in html_content:
2653                 raise ExtractorError(
2654                     '[youtube] No video results', expected=True)
2655
2656             new_videos = list(self._process_page(html_content))
2657             videos += new_videos
2658             if not new_videos or len(videos) > limit:
2659                 break
2660             next_link = self._html_search_regex(
2661                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2662                 html_content, 'next link', default=None)
2663             if next_link is None:
2664                 break
2665             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2666
2667         if len(videos) > n:
2668             videos = videos[:n]
2669         return self.playlist_result(videos, query)
2670
2671
2672 class YoutubeSearchDateIE(YoutubeSearchIE):
2673     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2674     _SEARCH_KEY = 'ytsearchdate'
2675     IE_DESC = 'YouTube.com searches, newest videos first'
2676     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2677
2678
2679 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2680     IE_DESC = 'YouTube.com search URLs'
2681     IE_NAME = 'youtube:search_url'
2682     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2683     _TESTS = [{
2684         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2685         'playlist_mincount': 5,
2686         'info_dict': {
2687             'title': 'youtube-dl test video',
2688         }
2689     }, {
2690         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2691         'only_matching': True,
2692     }]
2693
2694     def _real_extract(self, url):
2695         mobj = re.match(self._VALID_URL, url)
2696         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2697         webpage = self._download_webpage(url, query)
2698         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2699
2700
2701 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2702     IE_DESC = 'YouTube.com (multi-season) shows'
2703     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2704     IE_NAME = 'youtube:show'
2705     _TESTS = [{
2706         'url': 'https://www.youtube.com/show/airdisasters',
2707         'playlist_mincount': 5,
2708         'info_dict': {
2709             'id': 'airdisasters',
2710             'title': 'Air Disasters',
2711         }
2712     }]
2713
2714     def _real_extract(self, url):
2715         playlist_id = self._match_id(url)
2716         return super(YoutubeShowIE, self)._real_extract(
2717             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2718
2719
2720 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2721     """
2722     Base class for feed extractors
2723     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2724     """
2725     _LOGIN_REQUIRED = True
2726
2727     @property
2728     def IE_NAME(self):
2729         return 'youtube:%s' % self._FEED_NAME
2730
2731     def _real_initialize(self):
2732         self._login()
2733
2734     def _entries(self, page):
2735         # The extraction process is the same as for playlists, but the regex
2736         # for the video ids doesn't contain an index
2737         ids = []
2738         more_widget_html = content_html = page
2739         for page_num in itertools.count(1):
2740             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2741
2742             # 'recommended' feed has infinite 'load more' and each new portion spins
2743             # the same videos in (sometimes) slightly different order, so we'll check
2744             # for unicity and break when portion has no new videos
2745             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2746             if not new_ids:
2747                 break
2748
2749             ids.extend(new_ids)
2750
2751             for entry in self._ids_to_results(new_ids):
2752                 yield entry
2753
2754             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2755             if not mobj:
2756                 break
2757
2758             more = self._download_json(
2759                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2760                 'Downloading page #%s' % page_num,
2761                 transform_source=uppercase_escape)
2762             content_html = more['content_html']
2763             more_widget_html = more['load_more_widget_html']
2764
2765     def _real_extract(self, url):
2766         page = self._download_webpage(
2767             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2768             self._PLAYLIST_TITLE)
2769         return self.playlist_result(
2770             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2771
2772
2773 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2774     IE_NAME = 'youtube:watchlater'
2775     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2776     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2777
2778     _TESTS = [{
2779         'url': 'https://www.youtube.com/playlist?list=WL',
2780         'only_matching': True,
2781     }, {
2782         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2783         'only_matching': True,
2784     }]
2785
2786     def _real_extract(self, url):
2787         _, video = self._check_download_just_video(url, 'WL')
2788         if video:
2789             return video
2790         _, playlist = self._extract_playlist('WL')
2791         return playlist
2792
2793
2794 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2795     IE_NAME = 'youtube:favorites'
2796     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2797     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2798     _LOGIN_REQUIRED = True
2799
2800     def _real_extract(self, url):
2801         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2802         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2803         return self.url_result(playlist_id, 'YoutubePlaylist')
2804
2805
2806 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2807     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2808     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2809     _FEED_NAME = 'recommended'
2810     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2811
2812
2813 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2814     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2815     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2816     _FEED_NAME = 'subscriptions'
2817     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2818
2819
2820 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2821     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2822     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2823     _FEED_NAME = 'history'
2824     _PLAYLIST_TITLE = 'Youtube History'
2825
2826
2827 class YoutubeTruncatedURLIE(InfoExtractor):
2828     IE_NAME = 'youtube:truncated_url'
2829     IE_DESC = False  # Do not list
2830     _VALID_URL = r'''(?x)
2831         (?:https?://)?
2832         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2833         (?:watch\?(?:
2834             feature=[a-z_]+|
2835             annotation_id=annotation_[^&]+|
2836             x-yt-cl=[0-9]+|
2837             hl=[^&]*|
2838             t=[0-9]+
2839         )?
2840         |
2841             attribution_link\?a=[^&]+
2842         )
2843         $
2844     '''
2845
2846     _TESTS = [{
2847         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2848         'only_matching': True,
2849     }, {
2850         'url': 'https://www.youtube.com/watch?',
2851         'only_matching': True,
2852     }, {
2853         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2854         'only_matching': True,
2855     }, {
2856         'url': 'https://www.youtube.com/watch?feature=foo',
2857         'only_matching': True,
2858     }, {
2859         'url': 'https://www.youtube.com/watch?hl=en-GB',
2860         'only_matching': True,
2861     }, {
2862         'url': 'https://www.youtube.com/watch?t=2372',
2863         'only_matching': True,
2864     }]
2865
2866     def _real_extract(self, url):
2867         raise ExtractorError(
2868             'Did you forget to quote the URL? Remember that & is a meta '
2869             'character in most shells, so you want to put the URL in quotes, '
2870             'like  youtube-dl '
2871             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2872             ' or simply  youtube-dl BaW_jenozKc  .',
2873             expected=True)
2874
2875
2876 class YoutubeTruncatedIDIE(InfoExtractor):
2877     IE_NAME = 'youtube:truncated_id'
2878     IE_DESC = False  # Do not list
2879     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2880
2881     _TESTS = [{
2882         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2883         'only_matching': True,
2884     }]
2885
2886     def _real_extract(self, url):
2887         video_id = self._match_id(url)
2888         raise ExtractorError(
2889             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2890             expected=True)