_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_parse_qs,
  20     compat_urllib_parse_unquote,
  21     compat_urllib_parse_unquote_plus,
  22     compat_urllib_parse_urlencode,
  23     compat_urllib_parse_urlparse,
  24     compat_urlparse,
  25     compat_str,
  26 )
  27 from ..utils import (
  28     clean_html,
  29     error_to_compat_str,
  30     ExtractorError,
  31     float_or_none,
  32     get_element_by_attribute,
  33     get_element_by_id,
  34     int_or_none,
  35     mimetype2ext,
  36     orderedSet,
  37     parse_duration,
  38     remove_quotes,
  39     remove_start,
  40     sanitized_Request,
  41     smuggle_url,
  42     str_to_int,
  43     unescapeHTML,
  44     unified_strdate,
  45     unsmuggle_url,
  46     uppercase_escape,
  47     urlencode_postdata,
  48     ISO3166Utils,
  49 )
  50
  51
  52 class YoutubeBaseInfoExtractor(InfoExtractor):
  53     """Provide base functions for Youtube extractors"""
  54     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  55     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  56     _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
  57     _NETRC_MACHINE = 'youtube'
  58     # If True it will raise an error if no login info is provided
  59     _LOGIN_REQUIRED = False
  60
  61     def _set_language(self):
  62         self._set_cookie(
  63             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  64             # YouTube sets the expire time to about two months
  65             expire_time=time.time() + 2 * 30 * 24 * 3600)
  66
  67     def _ids_to_results(self, ids):
  68         return [
  69             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  70             for vid_id in ids]
  71
  72     def _login(self):
  73         """
  74         Attempt to log in to YouTube.
  75         True is returned if successful or skipped.
  76         False is returned if login failed.
  77
  78         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  79         """
  80         (username, password) = self._get_login_info()
  81         # No authentication to be performed
  82         if username is None:
  83             if self._LOGIN_REQUIRED:
  84                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  85             return True
  86
  87         login_page = self._download_webpage(
  88             self._LOGIN_URL, None,
  89             note='Downloading login page',
  90             errnote='unable to fetch login page', fatal=False)
  91         if login_page is False:
  92             return
  93
  94         login_form = self._hidden_inputs(login_page)
  95
  96         login_form.update({
  97             'Email': username,
  98             'Passwd': password,
  99         })
 100
 101         login_results = self._download_webpage(
 102             self._PASSWORD_CHALLENGE_URL, None,
 103             note='Logging in', errnote='unable to log in', fatal=False,
 104             data=urlencode_postdata(login_form))
 105         if login_results is False:
 106             return False
 107
 108         error_msg = self._html_search_regex(
 109             r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
 110             login_results, 'error message', default=None)
 111         if error_msg:
 112             raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
 113
 114         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 115             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 116
 117         # Two-Factor
 118         # TODO add SMS and phone call support - these require making a request and then prompting the user
 119
 120         if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
 121             tfa_code = self._get_tfa_info('2-step verification code')
 122
 123             if not tfa_code:
 124                 self._downloader.report_warning(
 125                     'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 126                     '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 127                 return False
 128
 129             tfa_code = remove_start(tfa_code, 'G-')
 130
 131             tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
 132
 133             tfa_form_strs.update({
 134                 'Pin': tfa_code,
 135                 'TrustDevice': 'on',
 136             })
 137
 138             tfa_data = urlencode_postdata(tfa_form_strs)
 139
 140             tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
 141             tfa_results = self._download_webpage(
 142                 tfa_req, None,
 143                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 144
 145             if tfa_results is False:
 146                 return False
 147
 148             if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
 149                 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
 150                 return False
 151             if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
 152                 self._downloader.report_warning('unable to log in - did the page structure change?')
 153                 return False
 154             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 155                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 156                 return False
 157
 158         if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
 159             self._downloader.report_warning('unable to log in: bad username or password')
 160             return False
 161         return True
 162
 163     def _real_initialize(self):
 164         if self._downloader is None:
 165             return
 166         self._set_language()
 167         if not self._login():
 168             return
 169
 170
 171 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 172     # Extract entries from page with "Load more" button
 173     def _entries(self, page, playlist_id):
 174         more_widget_html = content_html = page
 175         for page_num in itertools.count(1):
 176             for entry in self._process_page(content_html):
 177                 yield entry
 178
 179             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 180             if not mobj:
 181                 break
 182
 183             more = self._download_json(
 184                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 185                 'Downloading page #%s' % page_num,
 186                 transform_source=uppercase_escape)
 187             content_html = more['content_html']
 188             if not content_html.strip():
 189                 # Some webpages show a "Load more" button but they don't
 190                 # have more videos
 191                 break
 192             more_widget_html = more['load_more_widget_html']
 193
 194
 195 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 196     def _process_page(self, content):
 197         for video_id, video_title in self.extract_videos_from_page(content):
 198             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 199
 200     def extract_videos_from_page(self, page):
 201         ids_in_page = []
 202         titles_in_page = []
 203         for mobj in re.finditer(self._VIDEO_RE, page):
 204             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 205             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 206                 continue
 207             video_id = mobj.group('id')
 208             video_title = unescapeHTML(mobj.group('title'))
 209             if video_title:
 210                 video_title = video_title.strip()
 211             try:
 212                 idx = ids_in_page.index(video_id)
 213                 if video_title and not titles_in_page[idx]:
 214                     titles_in_page[idx] = video_title
 215             except ValueError:
 216                 ids_in_page.append(video_id)
 217                 titles_in_page.append(video_title)
 218         return zip(ids_in_page, titles_in_page)
 219
 220
 221 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 222     def _process_page(self, content):
 223         for playlist_id in orderedSet(re.findall(
 224                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 225                 content)):
 226             yield self.url_result(
 227                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 228
 229     def _real_extract(self, url):
 230         playlist_id = self._match_id(url)
 231         webpage = self._download_webpage(url, playlist_id)
 232         title = self._og_search_title(webpage, fatal=False)
 233         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 234
 235
 236 class YoutubeIE(YoutubeBaseInfoExtractor):
 237     IE_DESC = 'YouTube.com'
 238     _VALID_URL = r"""(?x)^
 239                      (
 240                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 241                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 242                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 243                             (?:www\.)?pwnyoutube\.com/|
 244                             (?:www\.)?yourepeat\.com/|
 245                             tube\.majestyc\.net/|
 246                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 247                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 248                          (?:                                                  # the various things that can precede the ID:
 249                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 250                              |(?:                                             # or the v= param in all its forms
 251                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 252                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 253                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 254                                  v=
 255                              )
 256                          ))
 257                          |(?:
 258                             youtu\.be|                                        # just youtu.be/xxxx
 259                             vid\.plus|                                        # or vid.plus/xxxx
 260                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 261                          )/
 262                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 263                          )
 264                      )?                                                       # all until now is optional -> you can pass the naked ID
 265                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 266                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 267                      (?(1).+)?                                                # if we found the ID, everything can follow
 268                      $"""
 269     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 270     _formats = {
 271         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 272         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 273         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 274         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 275         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 276         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 277         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 278         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 279         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 280         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 281         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 282         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 283         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 284         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 285         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 286         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 287         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 288         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 289
 290
 291         # 3D videos
 292         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 293         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 294         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 295         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 296         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 297         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 298         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 299
 300         # Apple HTTP Live Streaming
 301         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 302         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 303         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 304         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 305         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 306         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 307         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 308         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 309
 310         # DASH mp4 video
 311         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 312         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 313         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 314         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 315         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 316         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 317         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 318         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 319         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
 320         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
 321         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
 322
 323         # Dash mp4 audio
 324         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
 325         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
 326         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 327         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
 328         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
 329
 330         # Dash webm
 331         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 332         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 333         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 334         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 335         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 336         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 337         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
 338         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 339         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 340         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 341         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 342         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 343         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 344         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 345         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 346         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 347         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 348         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 349         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 350         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 351         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
 352         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
 353
 354         # Dash webm audio
 355         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 356         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 357
 358         # Dash webm audio with opus inside
 359         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
 360         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
 361         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
 362
 363         # RTMP (unnamed)
 364         '_rtmp': {'protocol': 'rtmp'},
 365     }
 366     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 367
 368     IE_NAME = 'youtube'
 369     _TESTS = [
 370         {
 371             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 372             'info_dict': {
 373                 'id': 'BaW_jenozKc',
 374                 'ext': 'mp4',
 375                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 376                 'uploader': 'Philipp Hagemeister',
 377                 'uploader_id': 'phihag',
 378                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
 379                 'upload_date': '20121002',
 380                 'license': 'Standard YouTube License',
 381                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 382                 'categories': ['Science & Technology'],
 383                 'tags': ['youtube-dl'],
 384                 'like_count': int,
 385                 'dislike_count': int,
 386                 'start_time': 1,
 387                 'end_time': 9,
 388             }
 389         },
 390         {
 391             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 392             'note': 'Test generic use_cipher_signature video (#897)',
 393             'info_dict': {
 394                 'id': 'UxxajLWwzqY',
 395                 'ext': 'mp4',
 396                 'upload_date': '20120506',
 397                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 398                 'alt_title': 'I Love It (feat. Charli XCX)',
 399                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 400                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 401                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 402                          'iconic ep', 'iconic', 'love', 'it'],
 403                 'uploader': 'Icona Pop',
 404                 'uploader_id': 'IconaPop',
 405                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 406                 'license': 'Standard YouTube License',
 407                 'creator': 'Icona Pop',
 408             }
 409         },
 410         {
 411             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 412             'note': 'Test VEVO video with age protection (#956)',
 413             'info_dict': {
 414                 'id': '07FYdnEawAQ',
 415                 'ext': 'mp4',
 416                 'upload_date': '20130703',
 417                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 418                 'alt_title': 'Tunnel Vision',
 419                 'description': 'md5:64249768eec3bc4276236606ea996373',
 420                 'uploader': 'justintimberlakeVEVO',
 421                 'uploader_id': 'justintimberlakeVEVO',
 422                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 423                 'license': 'Standard YouTube License',
 424                 'creator': 'Justin Timberlake',
 425                 'age_limit': 18,
 426             }
 427         },
 428         {
 429             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 430             'note': 'Embed-only video (#1746)',
 431             'info_dict': {
 432                 'id': 'yZIXLfi8CZQ',
 433                 'ext': 'mp4',
 434                 'upload_date': '20120608',
 435                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 436                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 437                 'uploader': 'SET India',
 438                 'uploader_id': 'setindia',
 439                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
 440                 'license': 'Standard YouTube License',
 441                 'age_limit': 18,
 442             }
 443         },
 444         {
 445             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 446             'note': 'Use the first video ID in the URL',
 447             'info_dict': {
 448                 'id': 'BaW_jenozKc',
 449                 'ext': 'mp4',
 450                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 451                 'uploader': 'Philipp Hagemeister',
 452                 'uploader_id': 'phihag',
 453                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
 454                 'upload_date': '20121002',
 455                 'license': 'Standard YouTube License',
 456                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 457                 'categories': ['Science & Technology'],
 458                 'tags': ['youtube-dl'],
 459                 'like_count': int,
 460                 'dislike_count': int,
 461             },
 462             'params': {
 463                 'skip_download': True,
 464             },
 465         },
 466         {
 467             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 468             'note': '256k DASH audio (format 141) via DASH manifest',
 469             'info_dict': {
 470                 'id': 'a9LDPn-MO4I',
 471                 'ext': 'm4a',
 472                 'upload_date': '20121002',
 473                 'uploader_id': '8KVIDEO',
 474                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 475                 'description': '',
 476                 'uploader': '8KVIDEO',
 477                 'license': 'Standard YouTube License',
 478                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 479             },
 480             'params': {
 481                 'youtube_include_dash_manifest': True,
 482                 'format': '141',
 483             },
 484             'skip': 'format 141 not served anymore',
 485         },
 486         # DASH manifest with encrypted signature
 487         {
 488             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 489             'info_dict': {
 490                 'id': 'IB3lcPjvWLA',
 491                 'ext': 'm4a',
 492                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 493                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 494                 'uploader': 'AfrojackVEVO',
 495                 'uploader_id': 'AfrojackVEVO',
 496                 'upload_date': '20131011',
 497                 'license': 'Standard YouTube License',
 498             },
 499             'params': {
 500                 'youtube_include_dash_manifest': True,
 501                 'format': '141/bestaudio[ext=m4a]',
 502             },
 503         },
 504         # JS player signature function name containing $
 505         {
 506             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 507             'info_dict': {
 508                 'id': 'nfWlot6h_JM',
 509                 'ext': 'm4a',
 510                 'title': 'Taylor Swift - Shake It Off',
 511                 'alt_title': 'Shake It Off',
 512                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 513                 'uploader': 'TaylorSwiftVEVO',
 514                 'uploader_id': 'TaylorSwiftVEVO',
 515                 'upload_date': '20140818',
 516                 'license': 'Standard YouTube License',
 517                 'creator': 'Taylor Swift',
 518             },
 519             'params': {
 520                 'youtube_include_dash_manifest': True,
 521                 'format': '141/bestaudio[ext=m4a]',
 522             },
 523         },
 524         # Controversy video
 525         {
 526             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 527             'info_dict': {
 528                 'id': 'T4XJQO3qol8',
 529                 'ext': 'mp4',
 530                 'upload_date': '20100909',
 531                 'uploader': 'The Amazing Atheist',
 532                 'uploader_id': 'TheAmazingAtheist',
 533                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 534                 'license': 'Standard YouTube License',
 535                 'title': 'Burning Everyone\'s Koran',
 536                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 537             }
 538         },
 539         # Normal age-gate video (No vevo, embed allowed)
 540         {
 541             'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
 542             'info_dict': {
 543                 'id': 'HtVdAasjOgU',
 544                 'ext': 'mp4',
 545                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 546                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 547                 'uploader': 'The Witcher',
 548                 'uploader_id': 'WitcherGame',
 549                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 550                 'upload_date': '20140605',
 551                 'license': 'Standard YouTube License',
 552                 'age_limit': 18,
 553             },
 554         },
 555         # Age-gate video with encrypted signature
 556         {
 557             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
 558             'info_dict': {
 559                 'id': '6kLq3WMV1nU',
 560                 'ext': 'mp4',
 561                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 562                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 563                 'uploader': 'LloydVEVO',
 564                 'uploader_id': 'LloydVEVO',
 565                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 566                 'upload_date': '20110629',
 567                 'license': 'Standard YouTube License',
 568                 'age_limit': 18,
 569             },
 570         },
 571         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 572         {
 573             'url': '__2ABJjxzNo',
 574             'info_dict': {
 575                 'id': '__2ABJjxzNo',
 576                 'ext': 'mp4',
 577                 'upload_date': '20100430',
 578                 'uploader_id': 'deadmau5',
 579                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 580                 'creator': 'deadmau5',
 581                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 582                 'uploader': 'deadmau5',
 583                 'license': 'Standard YouTube License',
 584                 'title': 'Deadmau5 - Some Chords (HD)',
 585                 'alt_title': 'Some Chords',
 586             },
 587             'expected_warnings': [
 588                 'DASH manifest missing',
 589             ]
 590         },
 591         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 592         {
 593             'url': 'lqQg6PlCWgI',
 594             'info_dict': {
 595                 'id': 'lqQg6PlCWgI',
 596                 'ext': 'mp4',
 597                 'upload_date': '20150827',
 598                 'uploader_id': 'olympic',
 599                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
 600                 'license': 'Standard YouTube License',
 601                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 602                 'uploader': 'Olympic',
 603                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 604             },
 605             'params': {
 606                 'skip_download': 'requires avconv',
 607             }
 608         },
 609         # Non-square pixels
 610         {
 611             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 612             'info_dict': {
 613                 'id': '_b-2C3KPAM0',
 614                 'ext': 'mp4',
 615                 'stretched_ratio': 16 / 9.,
 616                 'upload_date': '20110310',
 617                 'uploader_id': 'AllenMeow',
 618                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 619                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 620                 'uploader': '孫艾倫',
 621                 'license': 'Standard YouTube License',
 622                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 623             },
 624         },
 625         # url_encoded_fmt_stream_map is empty string
 626         {
 627             'url': 'qEJwOuvDf7I',
 628             'info_dict': {
 629                 'id': 'qEJwOuvDf7I',
 630                 'ext': 'webm',
 631                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 632                 'description': '',
 633                 'upload_date': '20150404',
 634                 'uploader_id': 'spbelect',
 635                 'uploader': 'Наблюдатели Петербурга',
 636             },
 637             'params': {
 638                 'skip_download': 'requires avconv',
 639             },
 640             'skip': 'This live event has ended.',
 641         },
 642         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 643         {
 644             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 645             'info_dict': {
 646                 'id': 'FIl7x6_3R5Y',
 647                 'ext': 'mp4',
 648                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 649                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 650                 'upload_date': '20150625',
 651                 'uploader_id': 'dorappi2000',
 652                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 653                 'uploader': 'dorappi2000',
 654                 'license': 'Standard YouTube License',
 655                 'formats': 'mincount:32',
 656             },
 657         },
 658         # DASH manifest with segment_list
 659         {
 660             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 661             'md5': '8ce563a1d667b599d21064e982ab9e31',
 662             'info_dict': {
 663                 'id': 'CsmdDsKjzN8',
 664                 'ext': 'mp4',
 665                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 666                 'uploader': 'Airtek',
 667                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 668                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 669                 'license': 'Standard YouTube License',
 670                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 671             },
 672             'params': {
 673                 'youtube_include_dash_manifest': True,
 674                 'format': '135',  # bestvideo
 675             },
 676             'skip': 'This live event has ended.',
 677         },
 678         {
 679             # Multifeed videos (multiple cameras), URL is for Main Camera
 680             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 681             'info_dict': {
 682                 'id': 'jqWvoWXjCVs',
 683                 'title': 'teamPGP: Rocket League Noob Stream',
 684                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 685             },
 686             'playlist': [{
 687                 'info_dict': {
 688                     'id': 'jqWvoWXjCVs',
 689                     'ext': 'mp4',
 690                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 691                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 692                     'upload_date': '20150721',
 693                     'uploader': 'Beer Games Beer',
 694                     'uploader_id': 'beergamesbeer',
 695                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 696                     'license': 'Standard YouTube License',
 697                 },
 698             }, {
 699                 'info_dict': {
 700                     'id': '6h8e8xoXJzg',
 701                     'ext': 'mp4',
 702                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 703                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 704                     'upload_date': '20150721',
 705                     'uploader': 'Beer Games Beer',
 706                     'uploader_id': 'beergamesbeer',
 707                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 708                     'license': 'Standard YouTube License',
 709                 },
 710             }, {
 711                 'info_dict': {
 712                     'id': 'PUOgX5z9xZw',
 713                     'ext': 'mp4',
 714                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 715                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 716                     'upload_date': '20150721',
 717                     'uploader': 'Beer Games Beer',
 718                     'uploader_id': 'beergamesbeer',
 719                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 720                     'license': 'Standard YouTube License',
 721                 },
 722             }, {
 723                 'info_dict': {
 724                     'id': 'teuwxikvS5k',
 725                     'ext': 'mp4',
 726                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 727                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 728                     'upload_date': '20150721',
 729                     'uploader': 'Beer Games Beer',
 730                     'uploader_id': 'beergamesbeer',
 731                     'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 732                     'license': 'Standard YouTube License',
 733                 },
 734             }],
 735             'params': {
 736                 'skip_download': True,
 737             },
 738         },
 739         {
 740             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 741             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 742             'info_dict': {
 743                 'id': 'gVfLd0zydlo',
 744                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 745             },
 746             'playlist_count': 2,
 747             'skip': 'Not multifeed anymore',
 748         },
 749         {
 750             'url': 'http://vid.plus/FlRa-iH7PGw',
 751             'only_matching': True,
 752         },
 753         {
 754             'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 755             'only_matching': True,
 756         },
 757         {
 758             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 759             # Also tests cut-off URL expansion in video description (see
 760             # https://github.com/rg3/youtube-dl/issues/1892,
 761             # https://github.com/rg3/youtube-dl/issues/8164)
 762             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 763             'info_dict': {
 764                 'id': 'lsguqyKfVQg',
 765                 'ext': 'mp4',
 766                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 767                 'alt_title': 'Dark Walk',
 768                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 769                 'upload_date': '20151119',
 770                 'uploader_id': 'IronSoulElf',
 771                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 772                 'uploader': 'IronSoulElf',
 773                 'license': 'Standard YouTube License',
 774                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
 775             },
 776             'params': {
 777                 'skip_download': True,
 778             },
 779         },
 780         {
 781             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 782             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 783             'only_matching': True,
 784         },
 785         {
 786             # Video with yt:stretch=17:0
 787             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 788             'info_dict': {
 789                 'id': 'Q39EVAstoRM',
 790                 'ext': 'mp4',
 791                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 792                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 793                 'upload_date': '20151107',
 794                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 795                 'uploader': 'CH GAMER DROID',
 796             },
 797             'params': {
 798                 'skip_download': True,
 799             },
 800             'skip': 'This video does not exist.',
 801         },
 802         {
 803             # Video licensed under Creative Commons
 804             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 805             'info_dict': {
 806                 'id': 'M4gD1WSo5mA',
 807                 'ext': 'mp4',
 808                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 809                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 810                 'upload_date': '20150127',
 811                 'uploader_id': 'BerkmanCenter',
 812                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 813                 'uploader': 'BerkmanCenter',
 814                 'license': 'Creative Commons Attribution license (reuse allowed)',
 815             },
 816             'params': {
 817                 'skip_download': True,
 818             },
 819         },
 820         {
 821             # Channel-like uploader_url
 822             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 823             'info_dict': {
 824                 'id': 'eQcmzGIKrzg',
 825                 'ext': 'mp4',
 826                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 827                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 828                 'upload_date': '20151119',
 829                 'uploader': 'Bernie 2016',
 830                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 831                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 832                 'license': 'Creative Commons Attribution license (reuse allowed)',
 833             },
 834             'params': {
 835                 'skip_download': True,
 836             },
 837         },
 838         {
 839             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 840             'only_matching': True,
 841         },
 842         {
 843             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 844             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 845             'only_matching': True,
 846         }
 847     ]
 848
 849     def __init__(self, *args, **kwargs):
 850         super(YoutubeIE, self).__init__(*args, **kwargs)
 851         self._player_cache = {}
 852
 853     def report_video_info_webpage_download(self, video_id):
 854         """Report attempt to download video info webpage."""
 855         self.to_screen('%s: Downloading video info webpage' % video_id)
 856
 857     def report_information_extraction(self, video_id):
 858         """Report attempt to extract video information."""
 859         self.to_screen('%s: Extracting video information' % video_id)
 860
 861     def report_unavailable_format(self, video_id, format):
 862         """Report extracted video URL."""
 863         self.to_screen('%s: Format %s not available' % (video_id, format))
 864
 865     def report_rtmp_download(self):
 866         """Indicate the download will use the RTMP protocol."""
 867         self.to_screen('RTMP download detected')
 868
 869     def _signature_cache_id(self, example_sig):
 870         """ Return a string representation of a signature """
 871         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 872
 873     def _extract_signature_function(self, video_id, player_url, example_sig):
 874         id_m = re.match(
 875             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
 876             player_url)
 877         if not id_m:
 878             raise ExtractorError('Cannot identify player %r' % player_url)
 879         player_type = id_m.group('ext')
 880         player_id = id_m.group('id')
 881
 882         # Read from filesystem cache
 883         func_id = '%s_%s_%s' % (
 884             player_type, player_id, self._signature_cache_id(example_sig))
 885         assert os.path.basename(func_id) == func_id
 886
 887         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 888         if cache_spec is not None:
 889             return lambda s: ''.join(s[i] for i in cache_spec)
 890
 891         download_note = (
 892             'Downloading player %s' % player_url
 893             if self._downloader.params.get('verbose') else
 894             'Downloading %s player %s' % (player_type, player_id)
 895         )
 896         if player_type == 'js':
 897             code = self._download_webpage(
 898                 player_url, video_id,
 899                 note=download_note,
 900                 errnote='Download of %s failed' % player_url)
 901             res = self._parse_sig_js(code)
 902         elif player_type == 'swf':
 903             urlh = self._request_webpage(
 904                 player_url, video_id,
 905                 note=download_note,
 906                 errnote='Download of %s failed' % player_url)
 907             code = urlh.read()
 908             res = self._parse_sig_swf(code)
 909         else:
 910             assert False, 'Invalid player type %r' % player_type
 911
 912         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 913         cache_res = res(test_string)
 914         cache_spec = [ord(c) for c in cache_res]
 915
 916         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 917         return res
 918
 919     def _print_sig_code(self, func, example_sig):
 920         def gen_sig_code(idxs):
 921             def _genslice(start, end, step):
 922                 starts = '' if start == 0 else str(start)
 923                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
 924                 steps = '' if step == 1 else (':%d' % step)
 925                 return 's[%s%s%s]' % (starts, ends, steps)
 926
 927             step = None
 928             # Quelch pyflakes warnings - start will be set when step is set
 929             start = '(Never used)'
 930             for i, prev in zip(idxs[1:], idxs[:-1]):
 931                 if step is not None:
 932                     if i - prev == step:
 933                         continue
 934                     yield _genslice(start, prev, step)
 935                     step = None
 936                     continue
 937                 if i - prev in [-1, 1]:
 938                     step = i - prev
 939                     start = prev
 940                     continue
 941                 else:
 942                     yield 's[%d]' % prev
 943             if step is None:
 944                 yield 's[%d]' % i
 945             else:
 946                 yield _genslice(start, i, step)
 947
 948         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 949         cache_res = func(test_string)
 950         cache_spec = [ord(c) for c in cache_res]
 951         expr_code = ' + '.join(gen_sig_code(cache_spec))
 952         signature_id_tuple = '(%s)' % (
 953             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 954         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 955                 '    return %s\n') % (signature_id_tuple, expr_code)
 956         self.to_screen('Extracted signature function:\n' + code)
 957
 958     def _parse_sig_js(self, jscode):
 959         funcname = self._search_regex(
 960             r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
 961             'Initial JS player signature function name')
 962
 963         jsi = JSInterpreter(jscode)
 964         initial_function = jsi.extract_function(funcname)
 965         return lambda s: initial_function([s])
 966
 967     def _parse_sig_swf(self, file_contents):
 968         swfi = SWFInterpreter(file_contents)
 969         TARGET_CLASSNAME = 'SignatureDecipher'
 970         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 971         initial_function = swfi.extract_function(searched_class, 'decipher')
 972         return lambda s: initial_function([s])
 973
 974     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 975         """Turn the encrypted s field into a working signature"""
 976
 977         if player_url is None:
 978             raise ExtractorError('Cannot decrypt signature without player_url')
 979
 980         if player_url.startswith('//'):
 981             player_url = 'https:' + player_url
 982         try:
 983             player_id = (player_url, self._signature_cache_id(s))
 984             if player_id not in self._player_cache:
 985                 func = self._extract_signature_function(
 986                     video_id, player_url, s
 987                 )
 988                 self._player_cache[player_id] = func
 989             func = self._player_cache[player_id]
 990             if self._downloader.params.get('youtube_print_sig_code'):
 991                 self._print_sig_code(func, s)
 992             return func(s)
 993         except Exception as e:
 994             tb = traceback.format_exc()
 995             raise ExtractorError(
 996                 'Signature extraction failed: ' + tb, cause=e)
 997
 998     def _get_subtitles(self, video_id, webpage):
 999         try:
1000             subs_doc = self._download_xml(
1001                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1002                 video_id, note=False)
1003         except ExtractorError as err:
1004             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1005             return {}
1006
1007         sub_lang_list = {}
1008         for track in subs_doc.findall('track'):
1009             lang = track.attrib['lang_code']
1010             if lang in sub_lang_list:
1011                 continue
1012             sub_formats = []
1013             for ext in self._SUBTITLE_FORMATS:
1014                 params = compat_urllib_parse_urlencode({
1015                     'lang': lang,
1016                     'v': video_id,
1017                     'fmt': ext,
1018                     'name': track.attrib['name'].encode('utf-8'),
1019                 })
1020                 sub_formats.append({
1021                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1022                     'ext': ext,
1023                 })
1024             sub_lang_list[lang] = sub_formats
1025         if not sub_lang_list:
1026             self._downloader.report_warning('video doesn\'t have subtitles')
1027             return {}
1028         return sub_lang_list
1029
1030     def _get_ytplayer_config(self, video_id, webpage):
1031         patterns = (
1032             # User data may contain arbitrary character sequences that may affect
1033             # JSON extraction with regex, e.g. when '};' is contained the second
1034             # regex won't capture the whole JSON. Yet working around by trying more
1035             # concrete regex first keeping in mind proper quoted string handling
1036             # to be implemented in future that will replace this workaround (see
1037             # https://github.com/rg3/youtube-dl/issues/7468,
1038             # https://github.com/rg3/youtube-dl/pull/7599)
1039             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1040             r';ytplayer\.config\s*=\s*({.+?});',
1041         )
1042         config = self._search_regex(
1043             patterns, webpage, 'ytplayer.config', default=None)
1044         if config:
1045             return self._parse_json(
1046                 uppercase_escape(config), video_id, fatal=False)
1047
1048     def _get_automatic_captions(self, video_id, webpage):
1049         """We need the webpage for getting the captions url, pass it as an
1050            argument to speed up the process."""
1051         self.to_screen('%s: Looking for automatic captions' % video_id)
1052         player_config = self._get_ytplayer_config(video_id, webpage)
1053         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1054         if not player_config:
1055             self._downloader.report_warning(err_msg)
1056             return {}
1057         try:
1058             args = player_config['args']
1059             caption_url = args.get('ttsurl')
1060             if caption_url:
1061                 timestamp = args['timestamp']
1062                 # We get the available subtitles
1063                 list_params = compat_urllib_parse_urlencode({
1064                     'type': 'list',
1065                     'tlangs': 1,
1066                     'asrs': 1,
1067                 })
1068                 list_url = caption_url + '&' + list_params
1069                 caption_list = self._download_xml(list_url, video_id)
1070                 original_lang_node = caption_list.find('track')
1071                 if original_lang_node is None:
1072                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1073                     return {}
1074                 original_lang = original_lang_node.attrib['lang_code']
1075                 caption_kind = original_lang_node.attrib.get('kind', '')
1076
1077                 sub_lang_list = {}
1078                 for lang_node in caption_list.findall('target'):
1079                     sub_lang = lang_node.attrib['lang_code']
1080                     sub_formats = []
1081                     for ext in self._SUBTITLE_FORMATS:
1082                         params = compat_urllib_parse_urlencode({
1083                             'lang': original_lang,
1084                             'tlang': sub_lang,
1085                             'fmt': ext,
1086                             'ts': timestamp,
1087                             'kind': caption_kind,
1088                         })
1089                         sub_formats.append({
1090                             'url': caption_url + '&' + params,
1091                             'ext': ext,
1092                         })
1093                     sub_lang_list[sub_lang] = sub_formats
1094                 return sub_lang_list
1095
1096             # Some videos don't provide ttsurl but rather caption_tracks and
1097             # caption_translation_languages (e.g. 20LmZk1hakA)
1098             caption_tracks = args['caption_tracks']
1099             caption_translation_languages = args['caption_translation_languages']
1100             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1101             parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
1102             caption_qs = compat_parse_qs(parsed_caption_url.query)
1103
1104             sub_lang_list = {}
1105             for lang in caption_translation_languages.split(','):
1106                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1107                 sub_lang = lang_qs.get('lc', [None])[0]
1108                 if not sub_lang:
1109                     continue
1110                 sub_formats = []
1111                 for ext in self._SUBTITLE_FORMATS:
1112                     caption_qs.update({
1113                         'tlang': [sub_lang],
1114                         'fmt': [ext],
1115                     })
1116                     sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
1117                         query=compat_urllib_parse_urlencode(caption_qs, True)))
1118                     sub_formats.append({
1119                         'url': sub_url,
1120                         'ext': ext,
1121                     })
1122                 sub_lang_list[sub_lang] = sub_formats
1123             return sub_lang_list
1124         # An extractor error can be raise by the download process if there are
1125         # no automatic captions but there are subtitles
1126         except (KeyError, ExtractorError):
1127             self._downloader.report_warning(err_msg)
1128             return {}
1129
1130     def _mark_watched(self, video_id, video_info):
1131         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1132         if not playback_url:
1133             return
1134         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1135         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1136
1137         # cpn generation algorithm is reverse engineered from base.js.
1138         # In fact it works even with dummy cpn.
1139         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1140         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1141
1142         qs.update({
1143             'ver': ['2'],
1144             'cpn': [cpn],
1145         })
1146         playback_url = compat_urlparse.urlunparse(
1147             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1148
1149         self._download_webpage(
1150             playback_url, video_id, 'Marking watched',
1151             'Unable to mark watched', fatal=False)
1152
1153     @classmethod
1154     def extract_id(cls, url):
1155         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1156         if mobj is None:
1157             raise ExtractorError('Invalid URL: %s' % url)
1158         video_id = mobj.group(2)
1159         return video_id
1160
1161     def _extract_from_m3u8(self, manifest_url, video_id):
1162         url_map = {}
1163
1164         def _get_urls(_manifest):
1165             lines = _manifest.split('\n')
1166             urls = filter(lambda l: l and not l.startswith('#'),
1167                           lines)
1168             return urls
1169         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1170         formats_urls = _get_urls(manifest)
1171         for format_url in formats_urls:
1172             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1173             url_map[itag] = format_url
1174         return url_map
1175
1176     def _extract_annotations(self, video_id):
1177         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1178         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1179
1180     def _real_extract(self, url):
1181         url, smuggled_data = unsmuggle_url(url, {})
1182
1183         proto = (
1184             'http' if self._downloader.params.get('prefer_insecure', False)
1185             else 'https')
1186
1187         start_time = None
1188         end_time = None
1189         parsed_url = compat_urllib_parse_urlparse(url)
1190         for component in [parsed_url.fragment, parsed_url.query]:
1191             query = compat_parse_qs(component)
1192             if start_time is None and 't' in query:
1193                 start_time = parse_duration(query['t'][0])
1194             if start_time is None and 'start' in query:
1195                 start_time = parse_duration(query['start'][0])
1196             if end_time is None and 'end' in query:
1197                 end_time = parse_duration(query['end'][0])
1198
1199         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1200         mobj = re.search(self._NEXT_URL_RE, url)
1201         if mobj:
1202             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1203         video_id = self.extract_id(url)
1204
1205         # Get video webpage
1206         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1207         video_webpage = self._download_webpage(url, video_id)
1208
1209         # Attempt to extract SWF player URL
1210         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1211         if mobj is not None:
1212             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1213         else:
1214             player_url = None
1215
1216         dash_mpds = []
1217
1218         def add_dash_mpd(video_info):
1219             dash_mpd = video_info.get('dashmpd')
1220             if dash_mpd and dash_mpd[0] not in dash_mpds:
1221                 dash_mpds.append(dash_mpd[0])
1222
1223         # Get video info
1224         embed_webpage = None
1225         is_live = None
1226         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1227             age_gate = True
1228             # We simulate the access to the video from www.youtube.com/v/{video_id}
1229             # this can be viewed without login into Youtube
1230             url = proto + '://www.youtube.com/embed/%s' % video_id
1231             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1232             data = compat_urllib_parse_urlencode({
1233                 'video_id': video_id,
1234                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1235                 'sts': self._search_regex(
1236                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1237             })
1238             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1239             video_info_webpage = self._download_webpage(
1240                 video_info_url, video_id,
1241                 note='Refetching age-gated info webpage',
1242                 errnote='unable to download video info webpage')
1243             video_info = compat_parse_qs(video_info_webpage)
1244             add_dash_mpd(video_info)
1245         else:
1246             age_gate = False
1247             video_info = None
1248             # Try looking directly into the video webpage
1249             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1250             if ytplayer_config:
1251                 args = ytplayer_config['args']
1252                 if args.get('url_encoded_fmt_stream_map'):
1253                     # Convert to the same format returned by compat_parse_qs
1254                     video_info = dict((k, [v]) for k, v in args.items())
1255                     add_dash_mpd(video_info)
1256                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1257                     is_live = True
1258             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1259                 # We also try looking in get_video_info since it may contain different dashmpd
1260                 # URL that points to a DASH manifest with possibly different itag set (some itags
1261                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1262                 # manifest pointed by get_video_info's dashmpd).
1263                 # The general idea is to take a union of itags of both DASH manifests (for example
1264                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1265                 self.report_video_info_webpage_download(video_id)
1266                 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1267                     video_info_url = (
1268                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1269                         % (proto, video_id, el_type))
1270                     video_info_webpage = self._download_webpage(
1271                         video_info_url,
1272                         video_id, note=False,
1273                         errnote='unable to download video info webpage')
1274                     get_video_info = compat_parse_qs(video_info_webpage)
1275                     if get_video_info.get('use_cipher_signature') != ['True']:
1276                         add_dash_mpd(get_video_info)
1277                     if not video_info:
1278                         video_info = get_video_info
1279                     if 'token' in get_video_info:
1280                         # Different get_video_info requests may report different results, e.g.
1281                         # some may report video unavailability, but some may serve it without
1282                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1283                         # the original webpage as well as el=info and el=embedded get_video_info
1284                         # requests report video unavailability due to geo restriction while
1285                         # el=detailpage succeeds and returns valid data). This is probably
1286                         # due to YouTube measures against IP ranges of hosting providers.
1287                         # Working around by preferring the first succeeded video_info containing
1288                         # the token if no such video_info yet was found.
1289                         if 'token' not in video_info:
1290                             video_info = get_video_info
1291                         break
1292         if 'token' not in video_info:
1293             if 'reason' in video_info:
1294                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1295                     regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1296                     if regions_allowed:
1297                         raise ExtractorError('YouTube said: This video is available in %s only' % (
1298                             ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1299                             expected=True)
1300                 raise ExtractorError(
1301                     'YouTube said: %s' % video_info['reason'][0],
1302                     expected=True, video_id=video_id)
1303             else:
1304                 raise ExtractorError(
1305                     '"token" parameter not in video info for unknown reason',
1306                     video_id=video_id)
1307
1308         # title
1309         if 'title' in video_info:
1310             video_title = video_info['title'][0]
1311         else:
1312             self._downloader.report_warning('Unable to extract video title')
1313             video_title = '_'
1314
1315         # description
1316         video_description = get_element_by_id("eow-description", video_webpage)
1317         if video_description:
1318             video_description = re.sub(r'''(?x)
1319                 <a\s+
1320                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1321                     (?:title|href)="([^"]+)"\s+
1322                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1323                     class="[^"]*"[^>]*>
1324                 [^<]+\.{3}\s*
1325                 </a>
1326             ''', r'\1', video_description)
1327             video_description = clean_html(video_description)
1328         else:
1329             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1330             if fd_mobj:
1331                 video_description = unescapeHTML(fd_mobj.group(1))
1332             else:
1333                 video_description = ''
1334
1335         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1336             if not self._downloader.params.get('noplaylist'):
1337                 entries = []
1338                 feed_ids = []
1339                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1340                 for feed in multifeed_metadata_list.split(','):
1341                     # Unquote should take place before split on comma (,) since textual
1342                     # fields may contain comma as well (see
1343                     # https://github.com/rg3/youtube-dl/issues/8536)
1344                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1345                     entries.append({
1346                         '_type': 'url_transparent',
1347                         'ie_key': 'Youtube',
1348                         'url': smuggle_url(
1349                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1350                             {'force_singlefeed': True}),
1351                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1352                     })
1353                     feed_ids.append(feed_data['id'][0])
1354                 self.to_screen(
1355                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1356                     % (', '.join(feed_ids), video_id))
1357                 return self.playlist_result(entries, video_id, video_title, video_description)
1358             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1359
1360         if 'view_count' in video_info:
1361             view_count = int(video_info['view_count'][0])
1362         else:
1363             view_count = None
1364
1365         # Check for "rental" videos
1366         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1367             raise ExtractorError('"rental" videos not supported')
1368
1369         # Start extracting information
1370         self.report_information_extraction(video_id)
1371
1372         # uploader
1373         if 'author' not in video_info:
1374             raise ExtractorError('Unable to extract uploader name')
1375         video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1376
1377         # uploader_id
1378         video_uploader_id = None
1379         video_uploader_url = None
1380         mobj = re.search(
1381             r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1382             video_webpage)
1383         if mobj is not None:
1384             video_uploader_id = mobj.group('uploader_id')
1385             video_uploader_url = mobj.group('uploader_url')
1386         else:
1387             self._downloader.report_warning('unable to extract uploader nickname')
1388
1389         # thumbnail image
1390         # We try first to get a high quality image:
1391         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1392                             video_webpage, re.DOTALL)
1393         if m_thumb is not None:
1394             video_thumbnail = m_thumb.group(1)
1395         elif 'thumbnail_url' not in video_info:
1396             self._downloader.report_warning('unable to extract video thumbnail')
1397             video_thumbnail = None
1398         else:   # don't panic if we can't find it
1399             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1400
1401         # upload date
1402         upload_date = self._html_search_meta(
1403             'datePublished', video_webpage, 'upload date', default=None)
1404         if not upload_date:
1405             upload_date = self._search_regex(
1406                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1407                  r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1408                 video_webpage, 'upload date', default=None)
1409             if upload_date:
1410                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1411         upload_date = unified_strdate(upload_date)
1412
1413         video_license = self._html_search_regex(
1414             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1415             video_webpage, 'license', default=None)
1416
1417         m_music = re.search(
1418             r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1419             video_webpage)
1420         if m_music:
1421             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1422             video_creator = clean_html(m_music.group('creator'))
1423         else:
1424             video_alt_title = video_creator = None
1425
1426         m_cat_container = self._search_regex(
1427             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1428             video_webpage, 'categories', default=None)
1429         if m_cat_container:
1430             category = self._html_search_regex(
1431                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1432                 default=None)
1433             video_categories = None if category is None else [category]
1434         else:
1435             video_categories = None
1436
1437         video_tags = [
1438             unescapeHTML(m.group('content'))
1439             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1440
1441         def _extract_count(count_name):
1442             return str_to_int(self._search_regex(
1443                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1444                 % re.escape(count_name),
1445                 video_webpage, count_name, default=None))
1446
1447         like_count = _extract_count('like')
1448         dislike_count = _extract_count('dislike')
1449
1450         # subtitles
1451         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1452         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1453
1454         if 'length_seconds' not in video_info:
1455             self._downloader.report_warning('unable to extract video duration')
1456             video_duration = None
1457         else:
1458             video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1459
1460         # annotations
1461         video_annotations = None
1462         if self._downloader.params.get('writeannotations', False):
1463             video_annotations = self._extract_annotations(video_id)
1464
1465         def _map_to_format_list(urlmap):
1466             formats = []
1467             for itag, video_real_url in urlmap.items():
1468                 dct = {
1469                     'format_id': itag,
1470                     'url': video_real_url,
1471                     'player_url': player_url,
1472                 }
1473                 if itag in self._formats:
1474                     dct.update(self._formats[itag])
1475                 formats.append(dct)
1476             return formats
1477
1478         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1479             self.report_rtmp_download()
1480             formats = [{
1481                 'format_id': '_rtmp',
1482                 'protocol': 'rtmp',
1483                 'url': video_info['conn'][0],
1484                 'player_url': player_url,
1485             }]
1486         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1487             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1488             if 'rtmpe%3Dyes' in encoded_url_map:
1489                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1490             formats_spec = {}
1491             fmt_list = video_info.get('fmt_list', [''])[0]
1492             if fmt_list:
1493                 for fmt in fmt_list.split(','):
1494                     spec = fmt.split('/')
1495                     if len(spec) > 1:
1496                         width_height = spec[1].split('x')
1497                         if len(width_height) == 2:
1498                             formats_spec[spec[0]] = {
1499                                 'resolution': spec[1],
1500                                 'width': int_or_none(width_height[0]),
1501                                 'height': int_or_none(width_height[1]),
1502                             }
1503             formats = []
1504             for url_data_str in encoded_url_map.split(','):
1505                 url_data = compat_parse_qs(url_data_str)
1506                 if 'itag' not in url_data or 'url' not in url_data:
1507                     continue
1508                 format_id = url_data['itag'][0]
1509                 url = url_data['url'][0]
1510
1511                 if 'sig' in url_data:
1512                     url += '&signature=' + url_data['sig'][0]
1513                 elif 's' in url_data:
1514                     encrypted_sig = url_data['s'][0]
1515                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1516
1517                     jsplayer_url_json = self._search_regex(
1518                         ASSETS_RE,
1519                         embed_webpage if age_gate else video_webpage,
1520                         'JS player URL (1)', default=None)
1521                     if not jsplayer_url_json and not age_gate:
1522                         # We need the embed website after all
1523                         if embed_webpage is None:
1524                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1525                             embed_webpage = self._download_webpage(
1526                                 embed_url, video_id, 'Downloading embed webpage')
1527                         jsplayer_url_json = self._search_regex(
1528                             ASSETS_RE, embed_webpage, 'JS player URL')
1529
1530                     player_url = json.loads(jsplayer_url_json)
1531                     if player_url is None:
1532                         player_url_json = self._search_regex(
1533                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1534                             video_webpage, 'age gate player URL')
1535                         player_url = json.loads(player_url_json)
1536
1537                     if self._downloader.params.get('verbose'):
1538                         if player_url is None:
1539                             player_version = 'unknown'
1540                             player_desc = 'unknown'
1541                         else:
1542                             if player_url.endswith('swf'):
1543                                 player_version = self._search_regex(
1544                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1545                                     'flash player', fatal=False)
1546                                 player_desc = 'flash player %s' % player_version
1547                             else:
1548                                 player_version = self._search_regex(
1549                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1550                                     player_url,
1551                                     'html5 player', fatal=False)
1552                                 player_desc = 'html5 player %s' % player_version
1553
1554                         parts_sizes = self._signature_cache_id(encrypted_sig)
1555                         self.to_screen('{%s} signature length %s, %s' %
1556                                        (format_id, parts_sizes, player_desc))
1557
1558                     signature = self._decrypt_signature(
1559                         encrypted_sig, video_id, player_url, age_gate)
1560                     url += '&signature=' + signature
1561                 if 'ratebypass' not in url:
1562                     url += '&ratebypass=yes'
1563
1564                 dct = {
1565                     'format_id': format_id,
1566                     'url': url,
1567                     'player_url': player_url,
1568                 }
1569                 if format_id in self._formats:
1570                     dct.update(self._formats[format_id])
1571                 if format_id in formats_spec:
1572                     dct.update(formats_spec[format_id])
1573
1574                 # Some itags are not included in DASH manifest thus corresponding formats will
1575                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1576                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1577                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1578                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1579
1580                 more_fields = {
1581                     'filesize': int_or_none(url_data.get('clen', [None])[0]),
1582                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1583                     'width': width,
1584                     'height': height,
1585                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1586                     'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1587                 }
1588                 for key, value in more_fields.items():
1589                     if value:
1590                         dct[key] = value
1591                 type_ = url_data.get('type', [None])[0]
1592                 if type_:
1593                     type_split = type_.split(';')
1594                     kind_ext = type_split[0].split('/')
1595                     if len(kind_ext) == 2:
1596                         kind, _ = kind_ext
1597                         dct['ext'] = mimetype2ext(type_split[0])
1598                         if kind in ('audio', 'video'):
1599                             codecs = None
1600                             for mobj in re.finditer(
1601                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1602                                 if mobj.group('key') == 'codecs':
1603                                     codecs = mobj.group('val')
1604                                     break
1605                             if codecs:
1606                                 codecs = codecs.split(',')
1607                                 if len(codecs) == 2:
1608                                     acodec, vcodec = codecs[1], codecs[0]
1609                                 else:
1610                                     acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1611                                 dct.update({
1612                                     'acodec': acodec,
1613                                     'vcodec': vcodec,
1614                                 })
1615                 formats.append(dct)
1616         elif video_info.get('hlsvp'):
1617             manifest_url = video_info['hlsvp'][0]
1618             url_map = self._extract_from_m3u8(manifest_url, video_id)
1619             formats = _map_to_format_list(url_map)
1620             # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1621             for a_format in formats:
1622                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1623         else:
1624             unavailable_message = self._html_search_regex(
1625                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1626                 video_webpage, 'unavailable message', default=None)
1627             if unavailable_message:
1628                 raise ExtractorError(unavailable_message, expected=True)
1629             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1630
1631         # Look for the DASH manifest
1632         if self._downloader.params.get('youtube_include_dash_manifest', True):
1633             dash_mpd_fatal = True
1634             for mpd_url in dash_mpds:
1635                 dash_formats = {}
1636                 try:
1637                     def decrypt_sig(mobj):
1638                         s = mobj.group(1)
1639                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1640                         return '/signature/%s' % dec_s
1641
1642                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
1643
1644                     for df in self._extract_mpd_formats(
1645                             mpd_url, video_id, fatal=dash_mpd_fatal,
1646                             formats_dict=self._formats):
1647                         # Do not overwrite DASH format found in some previous DASH manifest
1648                         if df['format_id'] not in dash_formats:
1649                             dash_formats[df['format_id']] = df
1650                         # Additional DASH manifests may end up in HTTP Error 403 therefore
1651                         # allow them to fail without bug report message if we already have
1652                         # some DASH manifest succeeded. This is temporary workaround to reduce
1653                         # burst of bug reports until we figure out the reason and whether it
1654                         # can be fixed at all.
1655                         dash_mpd_fatal = False
1656                 except (ExtractorError, KeyError) as e:
1657                     self.report_warning(
1658                         'Skipping DASH manifest: %r' % e, video_id)
1659                 if dash_formats:
1660                     # Remove the formats we found through non-DASH, they
1661                     # contain less info and it can be wrong, because we use
1662                     # fixed values (for example the resolution). See
1663                     # https://github.com/rg3/youtube-dl/issues/5774 for an
1664                     # example.
1665                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1666                     formats.extend(dash_formats.values())
1667
1668         # Check for malformed aspect ratio
1669         stretched_m = re.search(
1670             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1671             video_webpage)
1672         if stretched_m:
1673             w = float(stretched_m.group('w'))
1674             h = float(stretched_m.group('h'))
1675             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1676             # We will only process correct ratios.
1677             if w > 0 and h > 0:
1678                 ratio = w / h
1679                 for f in formats:
1680                     if f.get('vcodec') != 'none':
1681                         f['stretched_ratio'] = ratio
1682
1683         self._sort_formats(formats)
1684
1685         self.mark_watched(video_id, video_info)
1686
1687         return {
1688             'id': video_id,
1689             'uploader': video_uploader,
1690             'uploader_id': video_uploader_id,
1691             'uploader_url': video_uploader_url,
1692             'upload_date': upload_date,
1693             'license': video_license,
1694             'creator': video_creator,
1695             'title': video_title,
1696             'alt_title': video_alt_title,
1697             'thumbnail': video_thumbnail,
1698             'description': video_description,
1699             'categories': video_categories,
1700             'tags': video_tags,
1701             'subtitles': video_subtitles,
1702             'automatic_captions': automatic_captions,
1703             'duration': video_duration,
1704             'age_limit': 18 if age_gate else 0,
1705             'annotations': video_annotations,
1706             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1707             'view_count': view_count,
1708             'like_count': like_count,
1709             'dislike_count': dislike_count,
1710             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1711             'formats': formats,
1712             'is_live': is_live,
1713             'start_time': start_time,
1714             'end_time': end_time,
1715         }
1716
1717
1718 class YoutubeSharedVideoIE(InfoExtractor):
1719     _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
1720     IE_NAME = 'youtube:shared'
1721
1722     _TEST = {
1723         'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
1724         'info_dict': {
1725             'id': 'uPDB5I9wfp8',
1726             'ext': 'webm',
1727             'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
1728             'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
1729             'upload_date': '20160219',
1730             'uploader': 'Pocoyo - Português (BR)',
1731             'uploader_id': 'PocoyoBrazil',
1732         },
1733         'add_ie': ['Youtube'],
1734         'params': {
1735             # There are already too many Youtube downloads
1736             'skip_download': True,
1737         },
1738     }
1739
1740     def _real_extract(self, url):
1741         video_id = self._match_id(url)
1742
1743         webpage = self._download_webpage(url, video_id)
1744
1745         real_video_id = self._html_search_meta(
1746             'videoId', webpage, 'YouTube video id', fatal=True)
1747
1748         return self.url_result(real_video_id, YoutubeIE.ie_key())
1749
1750
1751 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
1752     IE_DESC = 'YouTube.com playlists'
1753     _VALID_URL = r"""(?x)(?:
1754                         (?:https?://)?
1755                         (?:\w+\.)?
1756                         youtube\.com/
1757                         (?:
1758                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1759                            \? (?:.*?[&;])*? (?:p|a|list)=
1760                         |  p/
1761                         )
1762                         (
1763                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1764                             # Top tracks, they can also include dots
1765                             |(?:MC)[\w\.]*
1766                         )
1767                         .*
1768                      |
1769                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1770                      )"""
1771     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1772     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1773     IE_NAME = 'youtube:playlist'
1774     _TESTS = [{
1775         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1776         'info_dict': {
1777             'title': 'ytdl test PL',
1778             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1779         },
1780         'playlist_count': 3,
1781     }, {
1782         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1783         'info_dict': {
1784             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1785             'title': 'YDL_Empty_List',
1786         },
1787         'playlist_count': 0,
1788     }, {
1789         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1790         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1791         'info_dict': {
1792             'title': '29C3: Not my department',
1793             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1794         },
1795         'playlist_count': 95,
1796     }, {
1797         'note': 'issue #673',
1798         'url': 'PLBB231211A4F62143',
1799         'info_dict': {
1800             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1801             'id': 'PLBB231211A4F62143',
1802         },
1803         'playlist_mincount': 26,
1804     }, {
1805         'note': 'Large playlist',
1806         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1807         'info_dict': {
1808             'title': 'Uploads from Cauchemar',
1809             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1810         },
1811         'playlist_mincount': 799,
1812     }, {
1813         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1814         'info_dict': {
1815             'title': 'YDL_safe_search',
1816             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1817         },
1818         'playlist_count': 2,
1819     }, {
1820         'note': 'embedded',
1821         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1822         'playlist_count': 4,
1823         'info_dict': {
1824             'title': 'JODA15',
1825             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1826         }
1827     }, {
1828         'note': 'Embedded SWF player',
1829         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1830         'playlist_count': 4,
1831         'info_dict': {
1832             'title': 'JODA7',
1833             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1834         }
1835     }, {
1836         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1837         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1838         'info_dict': {
1839             'title': 'Uploads from Interstellar Movie',
1840             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1841         },
1842         'playlist_mincout': 21,
1843     }]
1844
1845     def _real_initialize(self):
1846         self._login()
1847
1848     def _extract_mix(self, playlist_id):
1849         # The mixes are generated from a single video
1850         # the id of the playlist is just 'RD' + video_id
1851         ids = []
1852         last_id = playlist_id[-11:]
1853         for n in itertools.count(1):
1854             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1855             webpage = self._download_webpage(
1856                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
1857             new_ids = orderedSet(re.findall(
1858                 r'''(?xs)data-video-username=".*?".*?
1859                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1860                 webpage))
1861             # Fetch new pages until all the videos are repeated, it seems that
1862             # there are always 51 unique videos.
1863             new_ids = [_id for _id in new_ids if _id not in ids]
1864             if not new_ids:
1865                 break
1866             ids.extend(new_ids)
1867             last_id = ids[-1]
1868
1869         url_results = self._ids_to_results(ids)
1870
1871         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1872         title_span = (
1873             search_title('playlist-title') or
1874             search_title('title long-title') or
1875             search_title('title'))
1876         title = clean_html(title_span)
1877
1878         return self.playlist_result(url_results, playlist_id, title)
1879
1880     def _extract_playlist(self, playlist_id):
1881         url = self._TEMPLATE_URL % playlist_id
1882         page = self._download_webpage(url, playlist_id)
1883
1884         for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1885             match = match.strip()
1886             # Check if the playlist exists or is private
1887             if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1888                 raise ExtractorError(
1889                     'The playlist doesn\'t exist or is private, use --username or '
1890                     '--netrc to access it.',
1891                     expected=True)
1892             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1893                 raise ExtractorError(
1894                     'Invalid parameters. Maybe URL is incorrect.',
1895                     expected=True)
1896             elif re.match(r'[^<]*Choose your language[^<]*', match):
1897                 continue
1898             else:
1899                 self.report_warning('Youtube gives an alert message: ' + match)
1900
1901         playlist_title = self._html_search_regex(
1902             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1903             page, 'title')
1904
1905         return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1906
1907     def _check_download_just_video(self, url, playlist_id):
1908         # Check if it's a video-specific URL
1909         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1910         if 'v' in query_dict:
1911             video_id = query_dict['v'][0]
1912             if self._downloader.params.get('noplaylist'):
1913                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1914                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1915             else:
1916                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1917
1918     def _real_extract(self, url):
1919         # Extract playlist id
1920         mobj = re.match(self._VALID_URL, url)
1921         if mobj is None:
1922             raise ExtractorError('Invalid URL: %s' % url)
1923         playlist_id = mobj.group(1) or mobj.group(2)
1924
1925         video = self._check_download_just_video(url, playlist_id)
1926         if video:
1927             return video
1928
1929         if playlist_id.startswith(('RD', 'UL', 'PU')):
1930             # Mixes require a custom extraction process
1931             return self._extract_mix(playlist_id)
1932
1933         return self._extract_playlist(playlist_id)
1934
1935
1936 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1937     IE_DESC = 'YouTube.com channels'
1938     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1939     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1940     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1941     IE_NAME = 'youtube:channel'
1942     _TESTS = [{
1943         'note': 'paginated channel',
1944         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1945         'playlist_mincount': 91,
1946         'info_dict': {
1947             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1948             'title': 'Uploads from lex will',
1949         }
1950     }, {
1951         'note': 'Age restricted channel',
1952         # from https://www.youtube.com/user/DeusExOfficial
1953         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1954         'playlist_mincount': 64,
1955         'info_dict': {
1956             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1957             'title': 'Uploads from Deus Ex',
1958         },
1959     }]
1960
1961     @classmethod
1962     def suitable(cls, url):
1963         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
1964                 else super(YoutubeChannelIE, cls).suitable(url))
1965
1966     def _build_template_url(self, url, channel_id):
1967         return self._TEMPLATE_URL % channel_id
1968
1969     def _real_extract(self, url):
1970         channel_id = self._match_id(url)
1971
1972         url = self._build_template_url(url, channel_id)
1973
1974         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1975         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1976         # otherwise fallback on channel by page extraction
1977         channel_page = self._download_webpage(
1978             url + '?view=57', channel_id,
1979             'Downloading channel page', fatal=False)
1980         if channel_page is False:
1981             channel_playlist_id = False
1982         else:
1983             channel_playlist_id = self._html_search_meta(
1984                 'channelId', channel_page, 'channel id', default=None)
1985             if not channel_playlist_id:
1986                 channel_url = self._html_search_meta(
1987                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
1988                     channel_page, 'channel url', default=None)
1989                 if channel_url:
1990                     channel_playlist_id = self._search_regex(
1991                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
1992                         channel_url, 'channel id', default=None)
1993         if channel_playlist_id and channel_playlist_id.startswith('UC'):
1994             playlist_id = 'UU' + channel_playlist_id[2:]
1995             return self.url_result(
1996                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1997
1998         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1999         autogenerated = re.search(r'''(?x)
2000                 class="[^"]*?(?:
2001                     channel-header-autogenerated-label|
2002                     yt-channel-title-autogenerated
2003                 )[^"]*"''', channel_page) is not None
2004
2005         if autogenerated:
2006             # The videos are contained in a single page
2007             # the ajax pages can't be used, they are empty
2008             entries = [
2009                 self.url_result(
2010                     video_id, 'Youtube', video_id=video_id,
2011                     video_title=video_title)
2012                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2013             return self.playlist_result(entries, channel_id)
2014
2015         try:
2016             next(self._entries(channel_page, channel_id))
2017         except StopIteration:
2018             alert_message = self._html_search_regex(
2019                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2020                 channel_page, 'alert', default=None, group='alert')
2021             if alert_message:
2022                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2023
2024         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2025
2026
2027 class YoutubeUserIE(YoutubeChannelIE):
2028     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2029     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2030     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2031     IE_NAME = 'youtube:user'
2032
2033     _TESTS = [{
2034         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2035         'playlist_mincount': 320,
2036         'info_dict': {
2037             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2038             'title': 'Uploads from The Linux Foundation',
2039         }
2040     }, {
2041         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2042         # but not https://www.youtube.com/user/12minuteathlete/videos
2043         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2044         'playlist_mincount': 249,
2045         'info_dict': {
2046             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2047             'title': 'Uploads from 12 Minute Athlete',
2048         }
2049     }, {
2050         'url': 'ytuser:phihag',
2051         'only_matching': True,
2052     }, {
2053         'url': 'https://www.youtube.com/c/gametrailers',
2054         'only_matching': True,
2055     }, {
2056         'url': 'https://www.youtube.com/gametrailers',
2057         'only_matching': True,
2058     }, {
2059         # This channel is not available.
2060         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2061         'only_matching': True,
2062     }]
2063
2064     @classmethod
2065     def suitable(cls, url):
2066         # Don't return True if the url can be extracted with other youtube
2067         # extractor, the regex would is too permissive and it would match.
2068         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2069         if any(ie.suitable(url) for ie in other_yt_ies):
2070             return False
2071         else:
2072             return super(YoutubeUserIE, cls).suitable(url)
2073
2074     def _build_template_url(self, url, channel_id):
2075         mobj = re.match(self._VALID_URL, url)
2076         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2077
2078
2079 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2080     IE_DESC = 'YouTube.com live streams'
2081     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
2082     IE_NAME = 'youtube:live'
2083
2084     _TESTS = [{
2085         'url': 'http://www.youtube.com/user/TheYoungTurks/live',
2086         'info_dict': {
2087             'id': 'a48o2S1cPoo',
2088             'ext': 'mp4',
2089             'title': 'The Young Turks - Live Main Show',
2090             'uploader': 'The Young Turks',
2091             'uploader_id': 'TheYoungTurks',
2092             'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2093             'upload_date': '20150715',
2094             'license': 'Standard YouTube License',
2095             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2096             'categories': ['News & Politics'],
2097             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2098             'like_count': int,
2099             'dislike_count': int,
2100         },
2101         'params': {
2102             'skip_download': True,
2103         },
2104     }, {
2105         'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2106         'only_matching': True,
2107     }]
2108
2109     def _real_extract(self, url):
2110         mobj = re.match(self._VALID_URL, url)
2111         channel_id = mobj.group('id')
2112         base_url = mobj.group('base_url')
2113         webpage = self._download_webpage(url, channel_id, fatal=False)
2114         if webpage:
2115             page_type = self._og_search_property(
2116                 'type', webpage, 'page type', default=None)
2117             video_id = self._html_search_meta(
2118                 'videoId', webpage, 'video id', default=None)
2119             if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
2120                 return self.url_result(video_id, YoutubeIE.ie_key())
2121         return self.url_result(base_url)
2122
2123
2124 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2125     IE_DESC = 'YouTube.com user/channel playlists'
2126     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2127     IE_NAME = 'youtube:playlists'
2128
2129     _TESTS = [{
2130         'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
2131         'playlist_mincount': 4,
2132         'info_dict': {
2133             'id': 'ThirstForScience',
2134             'title': 'Thirst for Science',
2135         },
2136     }, {
2137         # with "Load more" button
2138         'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2139         'playlist_mincount': 70,
2140         'info_dict': {
2141             'id': 'igorkle1',
2142             'title': 'Игорь Клейнер',
2143         },
2144     }, {
2145         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2146         'playlist_mincount': 17,
2147         'info_dict': {
2148             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2149             'title': 'Chem Player',
2150         },
2151     }]
2152
2153
2154 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
2155     IE_DESC = 'YouTube.com searches'
2156     # there doesn't appear to be a real limit, for example if you search for
2157     # 'python' you get more than 8.000.000 results
2158     _MAX_RESULTS = float('inf')
2159     IE_NAME = 'youtube:search'
2160     _SEARCH_KEY = 'ytsearch'
2161     _EXTRA_QUERY_ARGS = {}
2162     _TESTS = []
2163
2164     def _get_n_results(self, query, n):
2165         """Get a specified number of results for a query"""
2166
2167         videos = []
2168         limit = n
2169
2170         for pagenum in itertools.count(1):
2171             url_query = {
2172                 'search_query': query.encode('utf-8'),
2173                 'page': pagenum,
2174                 'spf': 'navigate',
2175             }
2176             url_query.update(self._EXTRA_QUERY_ARGS)
2177             result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2178             data = self._download_json(
2179                 result_url, video_id='query "%s"' % query,
2180                 note='Downloading page %s' % pagenum,
2181                 errnote='Unable to download API page')
2182             html_content = data[1]['body']['content']
2183
2184             if 'class="search-message' in html_content:
2185                 raise ExtractorError(
2186                     '[youtube] No video results', expected=True)
2187
2188             new_videos = self._ids_to_results(orderedSet(re.findall(
2189                 r'href="/watch\?v=(.{11})', html_content)))
2190             videos += new_videos
2191             if not new_videos or len(videos) > limit:
2192                 break
2193
2194         if len(videos) > n:
2195             videos = videos[:n]
2196         return self.playlist_result(videos, query)
2197
2198
2199 class YoutubeSearchDateIE(YoutubeSearchIE):
2200     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2201     _SEARCH_KEY = 'ytsearchdate'
2202     IE_DESC = 'YouTube.com searches, newest videos first'
2203     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2204
2205
2206 class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
2207     IE_DESC = 'YouTube.com search URLs'
2208     IE_NAME = 'youtube:search_url'
2209     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2210     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2211     _TESTS = [{
2212         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2213         'playlist_mincount': 5,
2214         'info_dict': {
2215             'title': 'youtube-dl test video',
2216         }
2217     }, {
2218         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2219         'only_matching': True,
2220     }]
2221
2222     def _real_extract(self, url):
2223         mobj = re.match(self._VALID_URL, url)
2224         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2225         webpage = self._download_webpage(url, query)
2226         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2227
2228
2229 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2230     IE_DESC = 'YouTube.com (multi-season) shows'
2231     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
2232     IE_NAME = 'youtube:show'
2233     _TESTS = [{
2234         'url': 'https://www.youtube.com/show/airdisasters',
2235         'playlist_mincount': 5,
2236         'info_dict': {
2237             'id': 'airdisasters',
2238             'title': 'Air Disasters',
2239         }
2240     }]
2241
2242     def _real_extract(self, url):
2243         playlist_id = self._match_id(url)
2244         return super(YoutubeShowIE, self)._real_extract(
2245             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2246
2247
2248 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2249     """
2250     Base class for feed extractors
2251     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2252     """
2253     _LOGIN_REQUIRED = True
2254
2255     @property
2256     def IE_NAME(self):
2257         return 'youtube:%s' % self._FEED_NAME
2258
2259     def _real_initialize(self):
2260         self._login()
2261
2262     def _real_extract(self, url):
2263         page = self._download_webpage(
2264             'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2265
2266         # The extraction process is the same as for playlists, but the regex
2267         # for the video ids doesn't contain an index
2268         ids = []
2269         more_widget_html = content_html = page
2270         for page_num in itertools.count(1):
2271             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2272
2273             # 'recommended' feed has infinite 'load more' and each new portion spins
2274             # the same videos in (sometimes) slightly different order, so we'll check
2275             # for unicity and break when portion has no new videos
2276             new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2277             if not new_ids:
2278                 break
2279
2280             ids.extend(new_ids)
2281
2282             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2283             if not mobj:
2284                 break
2285
2286             more = self._download_json(
2287                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2288                 'Downloading page #%s' % page_num,
2289                 transform_source=uppercase_escape)
2290             content_html = more['content_html']
2291             more_widget_html = more['load_more_widget_html']
2292
2293         return self.playlist_result(
2294             self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2295
2296
2297 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2298     IE_NAME = 'youtube:watchlater'
2299     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2300     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2301
2302     _TESTS = [{
2303         'url': 'https://www.youtube.com/playlist?list=WL',
2304         'only_matching': True,
2305     }, {
2306         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2307         'only_matching': True,
2308     }]
2309
2310     def _real_extract(self, url):
2311         video = self._check_download_just_video(url, 'WL')
2312         if video:
2313             return video
2314         return self._extract_playlist('WL')
2315
2316
2317 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2318     IE_NAME = 'youtube:favorites'
2319     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2320     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2321     _LOGIN_REQUIRED = True
2322
2323     def _real_extract(self, url):
2324         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2325         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2326         return self.url_result(playlist_id, 'YoutubePlaylist')
2327
2328
2329 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2330     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2331     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2332     _FEED_NAME = 'recommended'
2333     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2334
2335
2336 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2337     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2338     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2339     _FEED_NAME = 'subscriptions'
2340     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2341
2342
2343 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2344     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2345     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2346     _FEED_NAME = 'history'
2347     _PLAYLIST_TITLE = 'Youtube History'
2348
2349
2350 class YoutubeTruncatedURLIE(InfoExtractor):
2351     IE_NAME = 'youtube:truncated_url'
2352     IE_DESC = False  # Do not list
2353     _VALID_URL = r'''(?x)
2354         (?:https?://)?
2355         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2356         (?:watch\?(?:
2357             feature=[a-z_]+|
2358             annotation_id=annotation_[^&]+|
2359             x-yt-cl=[0-9]+|
2360             hl=[^&]*|
2361             t=[0-9]+
2362         )?
2363         |
2364             attribution_link\?a=[^&]+
2365         )
2366         $
2367     '''
2368
2369     _TESTS = [{
2370         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2371         'only_matching': True,
2372     }, {
2373         'url': 'http://www.youtube.com/watch?',
2374         'only_matching': True,
2375     }, {
2376         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2377         'only_matching': True,
2378     }, {
2379         'url': 'https://www.youtube.com/watch?feature=foo',
2380         'only_matching': True,
2381     }, {
2382         'url': 'https://www.youtube.com/watch?hl=en-GB',
2383         'only_matching': True,
2384     }, {
2385         'url': 'https://www.youtube.com/watch?t=2372',
2386         'only_matching': True,
2387     }]
2388
2389     def _real_extract(self, url):
2390         raise ExtractorError(
2391             'Did you forget to quote the URL? Remember that & is a meta '
2392             'character in most shells, so you want to put the URL in quotes, '
2393             'like  youtube-dl '
2394             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2395             ' or simply  youtube-dl BaW_jenozKc  .',
2396             expected=True)
2397
2398
2399 class YoutubeTruncatedIDIE(InfoExtractor):
2400     IE_NAME = 'youtube:truncated_id'
2401     IE_DESC = False  # Do not list
2402     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2403
2404     _TESTS = [{
2405         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2406         'only_matching': True,
2407     }]
2408
2409     def _real_extract(self, url):
2410         video_id = self._match_id(url)
2411         raise ExtractorError(
2412             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2413             expected=True)