_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import time
  11 import traceback
  12
  13 from .common import InfoExtractor, SearchInfoExtractor
  14 from .subtitles import SubtitlesInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_parse_qs,
  20     compat_urllib_parse,
  21     compat_urllib_request,
  22     compat_urlparse,
  23     compat_str,
  24 )
  25 from ..utils import (
  26     clean_html,
  27     ExtractorError,
  28     float_or_none,
  29     get_element_by_attribute,
  30     get_element_by_id,
  31     int_or_none,
  32     OnDemandPagedList,
  33     orderedSet,
  34     unescapeHTML,
  35     unified_strdate,
  36     uppercase_escape,
  37 )
  38
  39
  40 class YoutubeBaseInfoExtractor(InfoExtractor):
  41     """Provide base functions for Youtube extractors"""
  42     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  43     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def _set_language(self):
  49         self._set_cookie(
  50             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  51             # YouTube sets the expire time to about two months
  52             expire_time=time.time() + 2 * 30 * 24 * 3600)
  53
  54     def _login(self):
  55         """
  56         Attempt to log in to YouTube.
  57         True is returned if successful or skipped.
  58         False is returned if login failed.
  59
  60         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  61         """
  62         (username, password) = self._get_login_info()
  63         # No authentication to be performed
  64         if username is None:
  65             if self._LOGIN_REQUIRED:
  66                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  67             return True
  68
  69         login_page = self._download_webpage(
  70             self._LOGIN_URL, None,
  71             note='Downloading login page',
  72             errnote='unable to fetch login page', fatal=False)
  73         if login_page is False:
  74             return
  75
  76         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  77                                   login_page, 'Login GALX parameter')
  78
  79         # Log in
  80         login_form_strs = {
  81             'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  82             'Email': username,
  83             'GALX': galx,
  84             'Passwd': password,
  85
  86             'PersistentCookie': 'yes',
  87             '_utf8': '霱',
  88             'bgresponse': 'js_disabled',
  89             'checkConnection': '',
  90             'checkedDomains': 'youtube',
  91             'dnConn': '',
  92             'pstMsg': '0',
  93             'rmShown': '1',
  94             'secTok': '',
  95             'signIn': 'Sign in',
  96             'timeStmp': '',
  97             'service': 'youtube',
  98             'uilel': '3',
  99             'hl': 'en_US',
 100         }
 101
 102         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 103         # chokes on unicode
 104         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
 105         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 106
 107         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 108         login_results = self._download_webpage(
 109             req, None,
 110             note='Logging in', errnote='unable to log in', fatal=False)
 111         if login_results is False:
 112             return False
 113
 114         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 115             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 116
 117         # Two-Factor
 118         # TODO add SMS and phone call support - these require making a request and then prompting the user
 119
 120         if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
 121             tfa_code = self._get_tfa_info()
 122
 123             if tfa_code is None:
 124                 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
 125                 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 126                 return False
 127
 128             # Unlike the first login form, secTok and timeStmp are both required for the TFA form
 129
 130             match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
 131             if match is None:
 132                 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
 133             secTok = match.group(1)
 134             match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
 135             if match is None:
 136                 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
 137             timeStmp = match.group(1)
 138
 139             tfa_form_strs = {
 140                 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
 141                 'smsToken': '',
 142                 'smsUserPin': tfa_code,
 143                 'smsVerifyPin': 'Verify',
 144
 145                 'PersistentCookie': 'yes',
 146                 'checkConnection': '',
 147                 'checkedDomains': 'youtube',
 148                 'pstMsg': '1',
 149                 'secTok': secTok,
 150                 'timeStmp': timeStmp,
 151                 'service': 'youtube',
 152                 'hl': 'en_US',
 153             }
 154             tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
 155             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
 156
 157             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
 158             tfa_results = self._download_webpage(
 159                 tfa_req, None,
 160                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 161
 162             if tfa_results is False:
 163                 return False
 164
 165             if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
 166                 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
 167                 return False
 168             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
 169                 self._downloader.report_warning('unable to log in - did the page structure change?')
 170                 return False
 171             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 172                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 173                 return False
 174
 175         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 176             self._downloader.report_warning('unable to log in: bad username or password')
 177             return False
 178         return True
 179
 180     def _real_initialize(self):
 181         if self._downloader is None:
 182             return
 183         self._set_language()
 184         if not self._login():
 185             return
 186
 187
 188 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 189     IE_DESC = 'YouTube.com'
 190     _VALID_URL = r"""(?x)^
 191                      (
 192                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 193                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 194                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 195                             (?:www\.)?pwnyoutube\.com/|
 196                             (?:www\.)?yourepeat\.com/|
 197                             tube\.majestyc\.net/|
 198                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 199                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 200                          (?:                                                  # the various things that can precede the ID:
 201                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 202                              |(?:                                             # or the v= param in all its forms
 203                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 204                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 205                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 206                                  v=
 207                              )
 208                          ))
 209                          |youtu\.be/                                          # just youtu.be/xxxx
 210                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 211                          )
 212                      )?                                                       # all until now is optional -> you can pass the naked ID
 213                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 214                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 215                      (?(1).+)?                                                # if we found the ID, everything can follow
 216                      $"""
 217     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 218     _formats = {
 219         '5': {'ext': 'flv', 'width': 400, 'height': 240},
 220         '6': {'ext': 'flv', 'width': 450, 'height': 270},
 221         '13': {'ext': '3gp'},
 222         '17': {'ext': '3gp', 'width': 176, 'height': 144},
 223         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
 224         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
 225         '34': {'ext': 'flv', 'width': 640, 'height': 360},
 226         '35': {'ext': 'flv', 'width': 854, 'height': 480},
 227         '36': {'ext': '3gp', 'width': 320, 'height': 240},
 228         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
 229         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
 230         '43': {'ext': 'webm', 'width': 640, 'height': 360},
 231         '44': {'ext': 'webm', 'width': 854, 'height': 480},
 232         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
 233         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
 234
 235
 236         # 3d videos
 237         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
 238         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
 239         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
 240         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
 241         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
 242         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
 243         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 244
 245         # Apple HTTP Live Streaming
 246         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 247         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
 248         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
 249         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
 250         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
 251         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 252         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 253
 254         # DASH mp4 video
 255         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 256         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 257         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 258         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 259         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 260         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 261         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 262         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 263         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 264         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 265         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 266
 267         # Dash mp4 audio
 268         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
 269         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
 270         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 271
 272         # Dash webm
 273         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 274         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 275         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 276         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 277         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 278         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 279         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
 280         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 281         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 282         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 283         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 284         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 285         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 286         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 287         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 288         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 289         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 290         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 291         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 292         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
 293         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 294
 295         # Dash webm audio
 296         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 297         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 298
 299         # Dash webm audio with opus inside
 300         '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
 301         '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
 302         '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
 303
 304         # RTMP (unnamed)
 305         '_rtmp': {'protocol': 'rtmp'},
 306     }
 307
 308     IE_NAME = 'youtube'
 309     _TESTS = [
 310         {
 311             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
 312             'info_dict': {
 313                 'id': 'BaW_jenozKc',
 314                 'ext': 'mp4',
 315                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 316                 'uploader': 'Philipp Hagemeister',
 317                 'uploader_id': 'phihag',
 318                 'upload_date': '20121002',
 319                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 320                 'categories': ['Science & Technology'],
 321                 'like_count': int,
 322                 'dislike_count': int,
 323             }
 324         },
 325         {
 326             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 327             'note': 'Test generic use_cipher_signature video (#897)',
 328             'info_dict': {
 329                 'id': 'UxxajLWwzqY',
 330                 'ext': 'mp4',
 331                 'upload_date': '20120506',
 332                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 333                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
 334                 'uploader': 'Icona Pop',
 335                 'uploader_id': 'IconaPop',
 336             }
 337         },
 338         {
 339             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 340             'note': 'Test VEVO video with age protection (#956)',
 341             'info_dict': {
 342                 'id': '07FYdnEawAQ',
 343                 'ext': 'mp4',
 344                 'upload_date': '20130703',
 345                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 346                 'description': 'md5:64249768eec3bc4276236606ea996373',
 347                 'uploader': 'justintimberlakeVEVO',
 348                 'uploader_id': 'justintimberlakeVEVO',
 349             }
 350         },
 351         {
 352             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 353             'note': 'Embed-only video (#1746)',
 354             'info_dict': {
 355                 'id': 'yZIXLfi8CZQ',
 356                 'ext': 'mp4',
 357                 'upload_date': '20120608',
 358                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 359                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 360                 'uploader': 'SET India',
 361                 'uploader_id': 'setindia'
 362             }
 363         },
 364         {
 365             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 366             'note': '256k DASH audio (format 141) via DASH manifest',
 367             'info_dict': {
 368                 'id': 'a9LDPn-MO4I',
 369                 'ext': 'm4a',
 370                 'upload_date': '20121002',
 371                 'uploader_id': '8KVIDEO',
 372                 'description': '',
 373                 'uploader': '8KVIDEO',
 374                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 375             },
 376             'params': {
 377                 'youtube_include_dash_manifest': True,
 378                 'format': '141',
 379             },
 380         },
 381         # DASH manifest with encrypted signature
 382         {
 383             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 384             'info_dict': {
 385                 'id': 'IB3lcPjvWLA',
 386                 'ext': 'm4a',
 387                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 388                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 389                 'uploader': 'AfrojackVEVO',
 390                 'uploader_id': 'AfrojackVEVO',
 391                 'upload_date': '20131011',
 392             },
 393             'params': {
 394                 'youtube_include_dash_manifest': True,
 395                 'format': '141',
 396             },
 397         },
 398         # JS player signature function name containing $
 399         {
 400             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 401             'info_dict': {
 402                 'id': 'nfWlot6h_JM',
 403                 'ext': 'm4a',
 404                 'title': 'Taylor Swift - Shake It Off',
 405                 'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
 406                 'uploader': 'TaylorSwiftVEVO',
 407                 'uploader_id': 'TaylorSwiftVEVO',
 408                 'upload_date': '20140818',
 409             },
 410             'params': {
 411                 'youtube_include_dash_manifest': True,
 412                 'format': '141',
 413             },
 414         },
 415         # Controversy video
 416         {
 417             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 418             'info_dict': {
 419                 'id': 'T4XJQO3qol8',
 420                 'ext': 'mp4',
 421                 'upload_date': '20100909',
 422                 'uploader': 'The Amazing Atheist',
 423                 'uploader_id': 'TheAmazingAtheist',
 424                 'title': 'Burning Everyone\'s Koran',
 425                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 426             }
 427         },
 428         # Normal age-gate video (No vevo, embed allowed)
 429         {
 430             'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
 431             'info_dict': {
 432                 'id': 'HtVdAasjOgU',
 433                 'ext': 'mp4',
 434                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 435                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 436                 'uploader': 'The Witcher',
 437                 'uploader_id': 'WitcherGame',
 438                 'upload_date': '20140605',
 439             },
 440         },
 441         # Age-gate video with encrypted signature
 442         {
 443             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
 444             'info_dict': {
 445                 'id': '6kLq3WMV1nU',
 446                 'ext': 'mp4',
 447                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 448                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 449                 'uploader': 'LloydVEVO',
 450                 'uploader_id': 'LloydVEVO',
 451                 'upload_date': '20110629',
 452             },
 453         },
 454         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 455         {
 456             'url': '__2ABJjxzNo',
 457             'info_dict': {
 458                 'id': '__2ABJjxzNo',
 459                 'ext': 'mp4',
 460                 'upload_date': '20100430',
 461                 'uploader_id': 'deadmau5',
 462                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 463                 'uploader': 'deadmau5',
 464                 'title': 'Deadmau5 - Some Chords (HD)',
 465             },
 466             'expected_warnings': [
 467                 'DASH manifest missing',
 468             ]
 469         },
 470         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 471         {
 472             'url': 'lqQg6PlCWgI',
 473             'info_dict': {
 474                 'id': 'lqQg6PlCWgI',
 475                 'ext': 'mp4',
 476                 'upload_date': '20120731',
 477                 'uploader_id': 'olympic',
 478                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 479                 'uploader': 'Olympics',
 480                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 481             },
 482             'params': {
 483                 'skip_download': 'requires avconv',
 484             }
 485         },
 486         # Non-square pixels
 487         {
 488             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 489             'info_dict': {
 490                 'id': '_b-2C3KPAM0',
 491                 'ext': 'mp4',
 492                 'stretched_ratio': 16 / 9.,
 493                 'upload_date': '20110310',
 494                 'uploader_id': 'AllenMeow',
 495                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 496                 'uploader': '孫艾倫',
 497                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 498             },
 499         }
 500     ]
 501
 502     def __init__(self, *args, **kwargs):
 503         super(YoutubeIE, self).__init__(*args, **kwargs)
 504         self._player_cache = {}
 505
 506     def report_video_info_webpage_download(self, video_id):
 507         """Report attempt to download video info webpage."""
 508         self.to_screen('%s: Downloading video info webpage' % video_id)
 509
 510     def report_information_extraction(self, video_id):
 511         """Report attempt to extract video information."""
 512         self.to_screen('%s: Extracting video information' % video_id)
 513
 514     def report_unavailable_format(self, video_id, format):
 515         """Report extracted video URL."""
 516         self.to_screen('%s: Format %s not available' % (video_id, format))
 517
 518     def report_rtmp_download(self):
 519         """Indicate the download will use the RTMP protocol."""
 520         self.to_screen('RTMP download detected')
 521
 522     def _signature_cache_id(self, example_sig):
 523         """ Return a string representation of a signature """
 524         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 525
 526     def _extract_signature_function(self, video_id, player_url, example_sig):
 527         id_m = re.match(
 528             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
 529             player_url)
 530         if not id_m:
 531             raise ExtractorError('Cannot identify player %r' % player_url)
 532         player_type = id_m.group('ext')
 533         player_id = id_m.group('id')
 534
 535         # Read from filesystem cache
 536         func_id = '%s_%s_%s' % (
 537             player_type, player_id, self._signature_cache_id(example_sig))
 538         assert os.path.basename(func_id) == func_id
 539
 540         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 541         if cache_spec is not None:
 542             return lambda s: ''.join(s[i] for i in cache_spec)
 543
 544         download_note = (
 545             'Downloading player %s' % player_url
 546             if self._downloader.params.get('verbose') else
 547             'Downloading %s player %s' % (player_type, player_id)
 548         )
 549         if player_type == 'js':
 550             code = self._download_webpage(
 551                 player_url, video_id,
 552                 note=download_note,
 553                 errnote='Download of %s failed' % player_url)
 554             res = self._parse_sig_js(code)
 555         elif player_type == 'swf':
 556             urlh = self._request_webpage(
 557                 player_url, video_id,
 558                 note=download_note,
 559                 errnote='Download of %s failed' % player_url)
 560             code = urlh.read()
 561             res = self._parse_sig_swf(code)
 562         else:
 563             assert False, 'Invalid player type %r' % player_type
 564
 565         if cache_spec is None:
 566             test_string = ''.join(map(compat_chr, range(len(example_sig))))
 567             cache_res = res(test_string)
 568             cache_spec = [ord(c) for c in cache_res]
 569
 570         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 571         return res
 572
 573     def _print_sig_code(self, func, example_sig):
 574         def gen_sig_code(idxs):
 575             def _genslice(start, end, step):
 576                 starts = '' if start == 0 else str(start)
 577                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
 578                 steps = '' if step == 1 else (':%d' % step)
 579                 return 's[%s%s%s]' % (starts, ends, steps)
 580
 581             step = None
 582             # Quelch pyflakes warnings - start will be set when step is set
 583             start = '(Never used)'
 584             for i, prev in zip(idxs[1:], idxs[:-1]):
 585                 if step is not None:
 586                     if i - prev == step:
 587                         continue
 588                     yield _genslice(start, prev, step)
 589                     step = None
 590                     continue
 591                 if i - prev in [-1, 1]:
 592                     step = i - prev
 593                     start = prev
 594                     continue
 595                 else:
 596                     yield 's[%d]' % prev
 597             if step is None:
 598                 yield 's[%d]' % i
 599             else:
 600                 yield _genslice(start, i, step)
 601
 602         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 603         cache_res = func(test_string)
 604         cache_spec = [ord(c) for c in cache_res]
 605         expr_code = ' + '.join(gen_sig_code(cache_spec))
 606         signature_id_tuple = '(%s)' % (
 607             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 608         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 609                 '    return %s\n') % (signature_id_tuple, expr_code)
 610         self.to_screen('Extracted signature function:\n' + code)
 611
 612     def _parse_sig_js(self, jscode):
 613         funcname = self._search_regex(
 614             r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
 615             'Initial JS player signature function name')
 616
 617         jsi = JSInterpreter(jscode)
 618         initial_function = jsi.extract_function(funcname)
 619         return lambda s: initial_function([s])
 620
 621     def _parse_sig_swf(self, file_contents):
 622         swfi = SWFInterpreter(file_contents)
 623         TARGET_CLASSNAME = 'SignatureDecipher'
 624         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 625         initial_function = swfi.extract_function(searched_class, 'decipher')
 626         return lambda s: initial_function([s])
 627
 628     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 629         """Turn the encrypted s field into a working signature"""
 630
 631         if player_url is None:
 632             raise ExtractorError('Cannot decrypt signature without player_url')
 633
 634         if player_url.startswith('//'):
 635             player_url = 'https:' + player_url
 636         try:
 637             player_id = (player_url, self._signature_cache_id(s))
 638             if player_id not in self._player_cache:
 639                 func = self._extract_signature_function(
 640                     video_id, player_url, s
 641                 )
 642                 self._player_cache[player_id] = func
 643             func = self._player_cache[player_id]
 644             if self._downloader.params.get('youtube_print_sig_code'):
 645                 self._print_sig_code(func, s)
 646             return func(s)
 647         except Exception as e:
 648             tb = traceback.format_exc()
 649             raise ExtractorError(
 650                 'Signature extraction failed: ' + tb, cause=e)
 651
 652     def _get_available_subtitles(self, video_id, webpage):
 653         try:
 654             subs_doc = self._download_xml(
 655                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
 656                 video_id, note=False)
 657         except ExtractorError as err:
 658             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
 659             return {}
 660
 661         sub_lang_list = {}
 662         for track in subs_doc.findall('track'):
 663             lang = track.attrib['lang_code']
 664             if lang in sub_lang_list:
 665                 continue
 666             params = compat_urllib_parse.urlencode({
 667                 'lang': lang,
 668                 'v': video_id,
 669                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
 670                 'name': track.attrib['name'].encode('utf-8'),
 671             })
 672             url = 'https://www.youtube.com/api/timedtext?' + params
 673             sub_lang_list[lang] = url
 674         if not sub_lang_list:
 675             self._downloader.report_warning('video doesn\'t have subtitles')
 676             return {}
 677         return sub_lang_list
 678
 679     def _get_available_automatic_caption(self, video_id, webpage):
 680         """We need the webpage for getting the captions url, pass it as an
 681            argument to speed up the process."""
 682         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
 683         self.to_screen('%s: Looking for automatic captions' % video_id)
 684         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
 685         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
 686         if mobj is None:
 687             self._downloader.report_warning(err_msg)
 688             return {}
 689         player_config = json.loads(mobj.group(1))
 690         try:
 691             args = player_config['args']
 692             caption_url = args['ttsurl']
 693             timestamp = args['timestamp']
 694             # We get the available subtitles
 695             list_params = compat_urllib_parse.urlencode({
 696                 'type': 'list',
 697                 'tlangs': 1,
 698                 'asrs': 1,
 699             })
 700             list_url = caption_url + '&' + list_params
 701             caption_list = self._download_xml(list_url, video_id)
 702             original_lang_node = caption_list.find('track')
 703             if original_lang_node is None:
 704                 self._downloader.report_warning('Video doesn\'t have automatic captions')
 705                 return {}
 706             original_lang = original_lang_node.attrib['lang_code']
 707             caption_kind = original_lang_node.attrib.get('kind', '')
 708
 709             sub_lang_list = {}
 710             for lang_node in caption_list.findall('target'):
 711                 sub_lang = lang_node.attrib['lang_code']
 712                 params = compat_urllib_parse.urlencode({
 713                     'lang': original_lang,
 714                     'tlang': sub_lang,
 715                     'fmt': sub_format,
 716                     'ts': timestamp,
 717                     'kind': caption_kind,
 718                 })
 719                 sub_lang_list[sub_lang] = caption_url + '&' + params
 720             return sub_lang_list
 721         # An extractor error can be raise by the download process if there are
 722         # no automatic captions but there are subtitles
 723         except (KeyError, ExtractorError):
 724             self._downloader.report_warning(err_msg)
 725             return {}
 726
 727     @classmethod
 728     def extract_id(cls, url):
 729         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
 730         if mobj is None:
 731             raise ExtractorError('Invalid URL: %s' % url)
 732         video_id = mobj.group(2)
 733         return video_id
 734
 735     def _extract_from_m3u8(self, manifest_url, video_id):
 736         url_map = {}
 737
 738         def _get_urls(_manifest):
 739             lines = _manifest.split('\n')
 740             urls = filter(lambda l: l and not l.startswith('#'),
 741                           lines)
 742             return urls
 743         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
 744         formats_urls = _get_urls(manifest)
 745         for format_url in formats_urls:
 746             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 747             url_map[itag] = format_url
 748         return url_map
 749
 750     def _extract_annotations(self, video_id):
 751         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 752         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 753
 754     def _parse_dash_manifest(
 755             self, video_id, dash_manifest_url, player_url, age_gate):
 756         def decrypt_sig(mobj):
 757             s = mobj.group(1)
 758             dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
 759             return '/signature/%s' % dec_s
 760         dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
 761         dash_doc = self._download_xml(
 762             dash_manifest_url, video_id,
 763             note='Downloading DASH manifest',
 764             errnote='Could not download DASH manifest')
 765
 766         formats = []
 767         for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
 768             url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
 769             if url_el is None:
 770                 continue
 771             format_id = r.attrib['id']
 772             video_url = url_el.text
 773             filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
 774             f = {
 775                 'format_id': format_id,
 776                 'url': video_url,
 777                 'width': int_or_none(r.attrib.get('width')),
 778                 'height': int_or_none(r.attrib.get('height')),
 779                 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
 780                 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
 781                 'filesize': filesize,
 782                 'fps': int_or_none(r.attrib.get('frameRate')),
 783             }
 784             try:
 785                 existing_format = next(
 786                     fo for fo in formats
 787                     if fo['format_id'] == format_id)
 788             except StopIteration:
 789                 full_info = self._formats.get(format_id, {}).copy()
 790                 full_info.update(f)
 791                 formats.append(full_info)
 792             else:
 793                 existing_format.update(f)
 794         return formats
 795
 796     def _real_extract(self, url):
 797         proto = (
 798             'http' if self._downloader.params.get('prefer_insecure', False)
 799             else 'https')
 800
 801         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
 802         mobj = re.search(self._NEXT_URL_RE, url)
 803         if mobj:
 804             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
 805         video_id = self.extract_id(url)
 806
 807         # Get video webpage
 808         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
 809         video_webpage = self._download_webpage(url, video_id)
 810
 811         # Attempt to extract SWF player URL
 812         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
 813         if mobj is not None:
 814             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
 815         else:
 816             player_url = None
 817
 818         # Get video info
 819         embed_webpage = None
 820         if re.search(r'player-age-gate-content">', video_webpage) is not None:
 821             age_gate = True
 822             # We simulate the access to the video from www.youtube.com/v/{video_id}
 823             # this can be viewed without login into Youtube
 824             url = proto + '://www.youtube.com/embed/%s' % video_id
 825             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
 826             data = compat_urllib_parse.urlencode({
 827                 'video_id': video_id,
 828                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
 829                 'sts': self._search_regex(
 830                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
 831             })
 832             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
 833             video_info_webpage = self._download_webpage(
 834                 video_info_url, video_id,
 835                 note='Refetching age-gated info webpage',
 836                 errnote='unable to download video info webpage')
 837             video_info = compat_parse_qs(video_info_webpage)
 838         else:
 839             age_gate = False
 840             try:
 841                 # Try looking directly into the video webpage
 842                 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
 843                 if not mobj:
 844                     raise ValueError('Could not find ytplayer.config')  # caught below
 845                 json_code = uppercase_escape(mobj.group(1))
 846                 ytplayer_config = json.loads(json_code)
 847                 args = ytplayer_config['args']
 848                 # Convert to the same format returned by compat_parse_qs
 849                 video_info = dict((k, [v]) for k, v in args.items())
 850                 if 'url_encoded_fmt_stream_map' not in args:
 851                     raise ValueError('No stream_map present')  # caught below
 852             except ValueError:
 853                 # We fallback to the get_video_info pages (used by the embed page)
 854                 self.report_video_info_webpage_download(video_id)
 855                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 856                     video_info_url = (
 857                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 858                         % (proto, video_id, el_type))
 859                     video_info_webpage = self._download_webpage(
 860                         video_info_url,
 861                         video_id, note=False,
 862                         errnote='unable to download video info webpage')
 863                     video_info = compat_parse_qs(video_info_webpage)
 864                     if 'token' in video_info:
 865                         break
 866         if 'token' not in video_info:
 867             if 'reason' in video_info:
 868                 raise ExtractorError(
 869                     'YouTube said: %s' % video_info['reason'][0],
 870                     expected=True, video_id=video_id)
 871             else:
 872                 raise ExtractorError(
 873                     '"token" parameter not in video info for unknown reason',
 874                     video_id=video_id)
 875
 876         if 'view_count' in video_info:
 877             view_count = int(video_info['view_count'][0])
 878         else:
 879             view_count = None
 880
 881         # Check for "rental" videos
 882         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
 883             raise ExtractorError('"rental" videos not supported')
 884
 885         # Start extracting information
 886         self.report_information_extraction(video_id)
 887
 888         # uploader
 889         if 'author' not in video_info:
 890             raise ExtractorError('Unable to extract uploader name')
 891         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
 892
 893         # uploader_id
 894         video_uploader_id = None
 895         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
 896         if mobj is not None:
 897             video_uploader_id = mobj.group(1)
 898         else:
 899             self._downloader.report_warning('unable to extract uploader nickname')
 900
 901         # title
 902         if 'title' in video_info:
 903             video_title = video_info['title'][0]
 904         else:
 905             self._downloader.report_warning('Unable to extract video title')
 906             video_title = '_'
 907
 908         # thumbnail image
 909         # We try first to get a high quality image:
 910         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
 911                             video_webpage, re.DOTALL)
 912         if m_thumb is not None:
 913             video_thumbnail = m_thumb.group(1)
 914         elif 'thumbnail_url' not in video_info:
 915             self._downloader.report_warning('unable to extract video thumbnail')
 916             video_thumbnail = None
 917         else:   # don't panic if we can't find it
 918             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
 919
 920         # upload date
 921         upload_date = None
 922         mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
 923         if mobj is None:
 924             mobj = re.search(
 925                 r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
 926                 video_webpage)
 927         if mobj is not None:
 928             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
 929             upload_date = unified_strdate(upload_date)
 930
 931         m_cat_container = self._search_regex(
 932             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
 933             video_webpage, 'categories', default=None)
 934         if m_cat_container:
 935             category = self._html_search_regex(
 936                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
 937                 default=None)
 938             video_categories = None if category is None else [category]
 939         else:
 940             video_categories = None
 941
 942         # description
 943         video_description = get_element_by_id("eow-description", video_webpage)
 944         if video_description:
 945             video_description = re.sub(r'''(?x)
 946                 <a\s+
 947                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
 948                     title="([^"]+)"\s+
 949                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
 950                     class="yt-uix-redirect-link"\s*>
 951                 [^<]+
 952                 </a>
 953             ''', r'\1', video_description)
 954             video_description = clean_html(video_description)
 955         else:
 956             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
 957             if fd_mobj:
 958                 video_description = unescapeHTML(fd_mobj.group(1))
 959             else:
 960                 video_description = ''
 961
 962         def _extract_count(count_name):
 963             count = self._search_regex(
 964                 r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
 965                 video_webpage, count_name, default=None)
 966             if count is not None:
 967                 return int(count.replace(',', ''))
 968             return None
 969         like_count = _extract_count('like')
 970         dislike_count = _extract_count('dislike')
 971
 972         # subtitles
 973         video_subtitles = self.extract_subtitles(video_id, video_webpage)
 974
 975         if self._downloader.params.get('listsubtitles', False):
 976             self._list_available_subtitles(video_id, video_webpage)
 977             return
 978
 979         if 'length_seconds' not in video_info:
 980             self._downloader.report_warning('unable to extract video duration')
 981             video_duration = None
 982         else:
 983             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
 984
 985         # annotations
 986         video_annotations = None
 987         if self._downloader.params.get('writeannotations', False):
 988             video_annotations = self._extract_annotations(video_id)
 989
 990         def _map_to_format_list(urlmap):
 991             formats = []
 992             for itag, video_real_url in urlmap.items():
 993                 dct = {
 994                     'format_id': itag,
 995                     'url': video_real_url,
 996                     'player_url': player_url,
 997                 }
 998                 if itag in self._formats:
 999                     dct.update(self._formats[itag])
1000                 formats.append(dct)
1001             return formats
1002
1003         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1004             self.report_rtmp_download()
1005             formats = [{
1006                 'format_id': '_rtmp',
1007                 'protocol': 'rtmp',
1008                 'url': video_info['conn'][0],
1009                 'player_url': player_url,
1010             }]
1011         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1012             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1013             if 'rtmpe%3Dyes' in encoded_url_map:
1014                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1015             url_map = {}
1016             for url_data_str in encoded_url_map.split(','):
1017                 url_data = compat_parse_qs(url_data_str)
1018                 if 'itag' not in url_data or 'url' not in url_data:
1019                     continue
1020                 format_id = url_data['itag'][0]
1021                 url = url_data['url'][0]
1022
1023                 if 'sig' in url_data:
1024                     url += '&signature=' + url_data['sig'][0]
1025                 elif 's' in url_data:
1026                     encrypted_sig = url_data['s'][0]
1027                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1028
1029                     jsplayer_url_json = self._search_regex(
1030                         ASSETS_RE,
1031                         embed_webpage if age_gate else video_webpage,
1032                         'JS player URL (1)', default=None)
1033                     if not jsplayer_url_json and not age_gate:
1034                         # We need the embed website after all
1035                         if embed_webpage is None:
1036                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1037                             embed_webpage = self._download_webpage(
1038                                 embed_url, video_id, 'Downloading embed webpage')
1039                         jsplayer_url_json = self._search_regex(
1040                             ASSETS_RE, embed_webpage, 'JS player URL')
1041
1042                     player_url = json.loads(jsplayer_url_json)
1043                     if player_url is None:
1044                         player_url_json = self._search_regex(
1045                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1046                             video_webpage, 'age gate player URL')
1047                         player_url = json.loads(player_url_json)
1048
1049                     if self._downloader.params.get('verbose'):
1050                         if player_url is None:
1051                             player_version = 'unknown'
1052                             player_desc = 'unknown'
1053                         else:
1054                             if player_url.endswith('swf'):
1055                                 player_version = self._search_regex(
1056                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1057                                     'flash player', fatal=False)
1058                                 player_desc = 'flash player %s' % player_version
1059                             else:
1060                                 player_version = self._search_regex(
1061                                     r'html5player-([^/]+?)(?:/html5player)?\.js',
1062                                     player_url,
1063                                     'html5 player', fatal=False)
1064                                 player_desc = 'html5 player %s' % player_version
1065
1066                         parts_sizes = self._signature_cache_id(encrypted_sig)
1067                         self.to_screen('{%s} signature length %s, %s' %
1068                                        (format_id, parts_sizes, player_desc))
1069
1070                     signature = self._decrypt_signature(
1071                         encrypted_sig, video_id, player_url, age_gate)
1072                     url += '&signature=' + signature
1073                 if 'ratebypass' not in url:
1074                     url += '&ratebypass=yes'
1075                 url_map[format_id] = url
1076             formats = _map_to_format_list(url_map)
1077         elif video_info.get('hlsvp'):
1078             manifest_url = video_info['hlsvp'][0]
1079             url_map = self._extract_from_m3u8(manifest_url, video_id)
1080             formats = _map_to_format_list(url_map)
1081         else:
1082             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1083
1084         # Look for the DASH manifest
1085         if self._downloader.params.get('youtube_include_dash_manifest', True):
1086             dash_mpd = video_info.get('dashmpd')
1087             if dash_mpd:
1088                 dash_manifest_url = dash_mpd[0]
1089                 try:
1090                     dash_formats = self._parse_dash_manifest(
1091                         video_id, dash_manifest_url, player_url, age_gate)
1092                 except (ExtractorError, KeyError) as e:
1093                     self.report_warning(
1094                         'Skipping DASH manifest: %r' % e, video_id)
1095                 else:
1096                     # Hide the formats we found through non-DASH
1097                     dash_keys = set(df['format_id'] for df in dash_formats)
1098                     for f in formats:
1099                         if f['format_id'] in dash_keys:
1100                             f['format_id'] = 'nondash-%s' % f['format_id']
1101                             f['preference'] = f.get('preference', 0) - 10000
1102                     formats.extend(dash_formats)
1103
1104         # Check for malformed aspect ratio
1105         stretched_m = re.search(
1106             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1107             video_webpage)
1108         if stretched_m:
1109             ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1110             for f in formats:
1111                 if f.get('vcodec') != 'none':
1112                     f['stretched_ratio'] = ratio
1113
1114         self._sort_formats(formats)
1115
1116         return {
1117             'id': video_id,
1118             'uploader': video_uploader,
1119             'uploader_id': video_uploader_id,
1120             'upload_date': upload_date,
1121             'title': video_title,
1122             'thumbnail': video_thumbnail,
1123             'description': video_description,
1124             'categories': video_categories,
1125             'subtitles': video_subtitles,
1126             'duration': video_duration,
1127             'age_limit': 18 if age_gate else 0,
1128             'annotations': video_annotations,
1129             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1130             'view_count': view_count,
1131             'like_count': like_count,
1132             'dislike_count': dislike_count,
1133             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1134             'formats': formats,
1135         }
1136
1137
1138 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1139     IE_DESC = 'YouTube.com playlists'
1140     _VALID_URL = r"""(?x)(?:
1141                         (?:https?://)?
1142                         (?:\w+\.)?
1143                         youtube\.com/
1144                         (?:
1145                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1146                            \? (?:.*?&)*? (?:p|a|list)=
1147                         |  p/
1148                         )
1149                         (
1150                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
1151                             # Top tracks, they can also include dots
1152                             |(?:MC)[\w\.]*
1153                         )
1154                         .*
1155                      |
1156                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
1157                      )"""
1158     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1159     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1160     IE_NAME = 'youtube:playlist'
1161     _TESTS = [{
1162         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1163         'info_dict': {
1164             'title': 'ytdl test PL',
1165             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1166         },
1167         'playlist_count': 3,
1168     }, {
1169         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1170         'info_dict': {
1171             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1172             'title': 'YDL_Empty_List',
1173         },
1174         'playlist_count': 0,
1175     }, {
1176         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1177         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1178         'info_dict': {
1179             'title': '29C3: Not my department',
1180             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1181         },
1182         'playlist_count': 95,
1183     }, {
1184         'note': 'issue #673',
1185         'url': 'PLBB231211A4F62143',
1186         'info_dict': {
1187             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1188             'id': 'PLBB231211A4F62143',
1189         },
1190         'playlist_mincount': 26,
1191     }, {
1192         'note': 'Large playlist',
1193         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1194         'info_dict': {
1195             'title': 'Uploads from Cauchemar',
1196             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1197         },
1198         'playlist_mincount': 799,
1199     }, {
1200         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1201         'info_dict': {
1202             'title': 'YDL_safe_search',
1203             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1204         },
1205         'playlist_count': 2,
1206     }, {
1207         'note': 'embedded',
1208         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1209         'playlist_count': 4,
1210         'info_dict': {
1211             'title': 'JODA15',
1212             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1213         }
1214     }, {
1215         'note': 'Embedded SWF player',
1216         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1217         'playlist_count': 4,
1218         'info_dict': {
1219             'title': 'JODA7',
1220             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1221         }
1222     }, {
1223         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1224         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1225         'info_dict': {
1226             'title': 'Uploads from Interstellar Movie',
1227             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1228         },
1229         'playlist_mincout': 21,
1230     }]
1231
1232     def _real_initialize(self):
1233         self._login()
1234
1235     def _ids_to_results(self, ids):
1236         return [
1237             self.url_result(vid_id, 'Youtube', video_id=vid_id)
1238             for vid_id in ids]
1239
1240     def _extract_mix(self, playlist_id):
1241         # The mixes are generated from a a single video
1242         # the id of the playlist is just 'RD' + video_id
1243         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1244         webpage = self._download_webpage(
1245             url, playlist_id, 'Downloading Youtube mix')
1246         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1247         title_span = (
1248             search_title('playlist-title') or
1249             search_title('title long-title') or
1250             search_title('title'))
1251         title = clean_html(title_span)
1252         ids = orderedSet(re.findall(
1253             r'''(?xs)data-video-username=".*?".*?
1254                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1255             webpage))
1256         url_results = self._ids_to_results(ids)
1257
1258         return self.playlist_result(url_results, playlist_id, title)
1259
1260     def _real_extract(self, url):
1261         # Extract playlist id
1262         mobj = re.match(self._VALID_URL, url)
1263         if mobj is None:
1264             raise ExtractorError('Invalid URL: %s' % url)
1265         playlist_id = mobj.group(1) or mobj.group(2)
1266
1267         # Check if it's a video-specific URL
1268         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1269         if 'v' in query_dict:
1270             video_id = query_dict['v'][0]
1271             if self._downloader.params.get('noplaylist'):
1272                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1273                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1274             else:
1275                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1276
1277         if playlist_id.startswith('RD'):
1278             # Mixes require a custom extraction process
1279             return self._extract_mix(playlist_id)
1280
1281         url = self._TEMPLATE_URL % playlist_id
1282         page = self._download_webpage(url, playlist_id)
1283         more_widget_html = content_html = page
1284
1285         # Check if the playlist exists or is private
1286         if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
1287             raise ExtractorError(
1288                 'The playlist doesn\'t exist or is private, use --username or '
1289                 '--netrc to access it.',
1290                 expected=True)
1291
1292         # Extract the video ids from the playlist pages
1293         ids = []
1294
1295         for page_num in itertools.count(1):
1296             matches = re.finditer(self._VIDEO_RE, content_html)
1297             # We remove the duplicates and the link with index 0
1298             # (it's not the first video of the playlist)
1299             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1300             ids.extend(new_ids)
1301
1302             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1303             if not mobj:
1304                 break
1305
1306             more = self._download_json(
1307                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1308                 'Downloading page #%s' % page_num,
1309                 transform_source=uppercase_escape)
1310             content_html = more['content_html']
1311             if not content_html.strip():
1312                 # Some webpages show a "Load more" button but they don't
1313                 # have more videos
1314                 break
1315             more_widget_html = more['load_more_widget_html']
1316
1317         playlist_title = self._html_search_regex(
1318             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1319             page, 'title')
1320
1321         url_results = self._ids_to_results(ids)
1322         return self.playlist_result(url_results, playlist_id, playlist_title)
1323
1324
1325 class YoutubeChannelIE(InfoExtractor):
1326     IE_DESC = 'YouTube.com channels'
1327     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1328     IE_NAME = 'youtube:channel'
1329     _TESTS = [{
1330         'note': 'paginated channel',
1331         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1332         'playlist_mincount': 91,
1333         'info_dict': {
1334             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1335         }
1336     }]
1337
1338     def extract_videos_from_page(self, page):
1339         ids_in_page = []
1340         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1341             if mobj.group(1) not in ids_in_page:
1342                 ids_in_page.append(mobj.group(1))
1343         return ids_in_page
1344
1345     def _real_extract(self, url):
1346         channel_id = self._match_id(url)
1347
1348         video_ids = []
1349         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1350         channel_page = self._download_webpage(url, channel_id)
1351         autogenerated = re.search(r'''(?x)
1352                 class="[^"]*?(?:
1353                     channel-header-autogenerated-label|
1354                     yt-channel-title-autogenerated
1355                 )[^"]*"''', channel_page) is not None
1356
1357         if autogenerated:
1358             # The videos are contained in a single page
1359             # the ajax pages can't be used, they are empty
1360             video_ids = self.extract_videos_from_page(channel_page)
1361             entries = [
1362                 self.url_result(video_id, 'Youtube', video_id=video_id)
1363                 for video_id in video_ids]
1364             return self.playlist_result(entries, channel_id)
1365
1366         def _entries():
1367             more_widget_html = content_html = channel_page
1368             for pagenum in itertools.count(1):
1369
1370                 ids_in_page = self.extract_videos_from_page(content_html)
1371                 for video_id in ids_in_page:
1372                     yield self.url_result(
1373                         video_id, 'Youtube', video_id=video_id)
1374
1375                 mobj = re.search(
1376                     r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1377                     more_widget_html)
1378                 if not mobj:
1379                     break
1380
1381                 more = self._download_json(
1382                     'https://youtube.com/%s' % mobj.group('more'), channel_id,
1383                     'Downloading page #%s' % (pagenum + 1),
1384                     transform_source=uppercase_escape)
1385                 content_html = more['content_html']
1386                 more_widget_html = more['load_more_widget_html']
1387
1388         return self.playlist_result(_entries(), channel_id)
1389
1390
1391 class YoutubeUserIE(InfoExtractor):
1392     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1393     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1394     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
1395     _GDATA_PAGE_SIZE = 50
1396     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1397     IE_NAME = 'youtube:user'
1398
1399     _TESTS = [{
1400         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1401         'playlist_mincount': 320,
1402         'info_dict': {
1403             'title': 'TheLinuxFoundation',
1404         }
1405     }, {
1406         'url': 'ytuser:phihag',
1407         'only_matching': True,
1408     }]
1409
1410     @classmethod
1411     def suitable(cls, url):
1412         # Don't return True if the url can be extracted with other youtube
1413         # extractor, the regex would is too permissive and it would match.
1414         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1415         if any(ie.suitable(url) for ie in other_ies):
1416             return False
1417         else:
1418             return super(YoutubeUserIE, cls).suitable(url)
1419
1420     def _real_extract(self, url):
1421         username = self._match_id(url)
1422
1423         # Download video ids using YouTube Data API. Result size per
1424         # query is limited (currently to 50 videos) so we need to query
1425         # page by page until there are no video ids - it means we got
1426         # all of them.
1427
1428         def download_page(pagenum):
1429             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1430
1431             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1432             page = self._download_webpage(
1433                 gdata_url, username,
1434                 'Downloading video ids from %d to %d' % (
1435                     start_index, start_index + self._GDATA_PAGE_SIZE))
1436
1437             try:
1438                 response = json.loads(page)
1439             except ValueError as err:
1440                 raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
1441             if 'entry' not in response['feed']:
1442                 return
1443
1444             # Extract video identifiers
1445             entries = response['feed']['entry']
1446             for entry in entries:
1447                 title = entry['title']['$t']
1448                 video_id = entry['id']['$t'].split('/')[-1]
1449                 yield {
1450                     '_type': 'url',
1451                     'url': video_id,
1452                     'ie_key': 'Youtube',
1453                     'id': video_id,
1454                     'title': title,
1455                 }
1456         url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
1457
1458         return self.playlist_result(url_results, playlist_title=username)
1459
1460
1461 class YoutubeSearchIE(SearchInfoExtractor):
1462     IE_DESC = 'YouTube.com searches'
1463     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1464     _MAX_RESULTS = 1000
1465     IE_NAME = 'youtube:search'
1466     _SEARCH_KEY = 'ytsearch'
1467
1468     def _get_n_results(self, query, n):
1469         """Get a specified number of results for a query"""
1470
1471         video_ids = []
1472         pagenum = 0
1473         limit = n
1474         PAGE_SIZE = 50
1475
1476         while (PAGE_SIZE * pagenum) < limit:
1477             result_url = self._API_URL % (
1478                 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1479                 (PAGE_SIZE * pagenum) + 1)
1480             data_json = self._download_webpage(
1481                 result_url, video_id='query "%s"' % query,
1482                 note='Downloading page %s' % (pagenum + 1),
1483                 errnote='Unable to download API page')
1484             data = json.loads(data_json)
1485             api_response = data['data']
1486
1487             if 'items' not in api_response:
1488                 raise ExtractorError(
1489                     '[youtube] No video results', expected=True)
1490
1491             new_ids = list(video['id'] for video in api_response['items'])
1492             video_ids += new_ids
1493
1494             limit = min(n, api_response['totalItems'])
1495             pagenum += 1
1496
1497         if len(video_ids) > n:
1498             video_ids = video_ids[:n]
1499         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1500                   for video_id in video_ids]
1501         return self.playlist_result(videos, query)
1502
1503
1504 class YoutubeSearchDateIE(YoutubeSearchIE):
1505     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1506     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1507     _SEARCH_KEY = 'ytsearchdate'
1508     IE_DESC = 'YouTube.com searches, newest videos first'
1509
1510
1511 class YoutubeSearchURLIE(InfoExtractor):
1512     IE_DESC = 'YouTube.com search URLs'
1513     IE_NAME = 'youtube:search_url'
1514     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1515     _TESTS = [{
1516         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1517         'playlist_mincount': 5,
1518         'info_dict': {
1519             'title': 'youtube-dl test video',
1520         }
1521     }]
1522
1523     def _real_extract(self, url):
1524         mobj = re.match(self._VALID_URL, url)
1525         query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1526
1527         webpage = self._download_webpage(url, query)
1528         result_code = self._search_regex(
1529             r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
1530
1531         part_codes = re.findall(
1532             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1533         entries = []
1534         for part_code in part_codes:
1535             part_title = self._html_search_regex(
1536                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1537             part_url_snippet = self._html_search_regex(
1538                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1539             part_url = compat_urlparse.urljoin(
1540                 'https://www.youtube.com/', part_url_snippet)
1541             entries.append({
1542                 '_type': 'url',
1543                 'url': part_url,
1544                 'title': part_title,
1545             })
1546
1547         return {
1548             '_type': 'playlist',
1549             'entries': entries,
1550             'title': query,
1551         }
1552
1553
1554 class YoutubeShowIE(InfoExtractor):
1555     IE_DESC = 'YouTube.com (multi-season) shows'
1556     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1557     IE_NAME = 'youtube:show'
1558     _TESTS = [{
1559         'url': 'http://www.youtube.com/show/airdisasters',
1560         'playlist_mincount': 3,
1561         'info_dict': {
1562             'id': 'airdisasters',
1563             'title': 'Air Disasters',
1564         }
1565     }]
1566
1567     def _real_extract(self, url):
1568         mobj = re.match(self._VALID_URL, url)
1569         playlist_id = mobj.group('id')
1570         webpage = self._download_webpage(
1571             url, playlist_id, 'Downloading show webpage')
1572         # There's one playlist for each season of the show
1573         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1574         self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1575         entries = [
1576             self.url_result(
1577                 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1578             for season in m_seasons
1579         ]
1580         title = self._og_search_title(webpage, fatal=False)
1581
1582         return {
1583             '_type': 'playlist',
1584             'id': playlist_id,
1585             'title': title,
1586             'entries': entries,
1587         }
1588
1589
1590 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1591     """
1592     Base class for extractors that fetch info from
1593     http://www.youtube.com/feed_ajax
1594     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1595     """
1596     _LOGIN_REQUIRED = True
1597     # use action_load_personal_feed instead of action_load_system_feed
1598     _PERSONAL_FEED = False
1599
1600     @property
1601     def _FEED_TEMPLATE(self):
1602         action = 'action_load_system_feed'
1603         if self._PERSONAL_FEED:
1604             action = 'action_load_personal_feed'
1605         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1606
1607     @property
1608     def IE_NAME(self):
1609         return 'youtube:%s' % self._FEED_NAME
1610
1611     def _real_initialize(self):
1612         self._login()
1613
1614     def _real_extract(self, url):
1615         feed_entries = []
1616         paging = 0
1617         for i in itertools.count(1):
1618             info = self._download_json(
1619                 self._FEED_TEMPLATE % paging,
1620                 '%s feed' % self._FEED_NAME,
1621                 'Downloading page %s' % i,
1622                 transform_source=uppercase_escape)
1623             feed_html = info.get('feed_html') or info.get('content_html')
1624             load_more_widget_html = info.get('load_more_widget_html') or feed_html
1625             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1626             ids = orderedSet(m.group(1) for m in m_ids)
1627             feed_entries.extend(
1628                 self.url_result(video_id, 'Youtube', video_id=video_id)
1629                 for video_id in ids)
1630             mobj = re.search(
1631                 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
1632                 load_more_widget_html)
1633             if mobj is None:
1634                 break
1635             paging = mobj.group('paging')
1636         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1637
1638
1639 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1640     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1641     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1642     _FEED_NAME = 'recommended'
1643     _PLAYLIST_TITLE = 'Youtube Recommended videos'
1644
1645
1646 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1647     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1648     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1649     _FEED_NAME = 'watch_later'
1650     _PLAYLIST_TITLE = 'Youtube Watch Later'
1651     _PERSONAL_FEED = True
1652
1653
1654 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1655     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1656     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1657     _FEED_NAME = 'history'
1658     _PERSONAL_FEED = True
1659     _PLAYLIST_TITLE = 'Youtube Watch History'
1660
1661
1662 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1663     IE_NAME = 'youtube:favorites'
1664     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1665     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1666     _LOGIN_REQUIRED = True
1667
1668     def _real_extract(self, url):
1669         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1670         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1671         return self.url_result(playlist_id, 'YoutubePlaylist')
1672
1673
1674 class YoutubeSubscriptionsIE(YoutubePlaylistIE):
1675     IE_NAME = 'youtube:subscriptions'
1676     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1677     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1678     _TESTS = []
1679
1680     def _real_extract(self, url):
1681         title = 'Youtube Subscriptions'
1682         page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
1683
1684         # The extraction process is the same as for playlists, but the regex
1685         # for the video ids doesn't contain an index
1686         ids = []
1687         more_widget_html = content_html = page
1688
1689         for page_num in itertools.count(1):
1690             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1691             new_ids = orderedSet(matches)
1692             ids.extend(new_ids)
1693
1694             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1695             if not mobj:
1696                 break
1697
1698             more = self._download_json(
1699                 'https://youtube.com/%s' % mobj.group('more'), title,
1700                 'Downloading page #%s' % page_num,
1701                 transform_source=uppercase_escape)
1702             content_html = more['content_html']
1703             more_widget_html = more['load_more_widget_html']
1704
1705         return {
1706             '_type': 'playlist',
1707             'title': title,
1708             'entries': self._ids_to_results(ids),
1709         }
1710
1711
1712 class YoutubeTruncatedURLIE(InfoExtractor):
1713     IE_NAME = 'youtube:truncated_url'
1714     IE_DESC = False  # Do not list
1715     _VALID_URL = r'''(?x)
1716         (?:https?://)?
1717         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1718         (?:watch\?(?:
1719             feature=[a-z_]+|
1720             annotation_id=annotation_[^&]+|
1721             x-yt-cl=[0-9]+|
1722             hl=[^&]*|
1723         )?
1724         |
1725             attribution_link\?a=[^&]+
1726         )
1727         $
1728     '''
1729
1730     _TESTS = [{
1731         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1732         'only_matching': True,
1733     }, {
1734         'url': 'http://www.youtube.com/watch?',
1735         'only_matching': True,
1736     }, {
1737         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1738         'only_matching': True,
1739     }, {
1740         'url': 'https://www.youtube.com/watch?feature=foo',
1741         'only_matching': True,
1742     }, {
1743         'url': 'https://www.youtube.com/watch?hl=en-GB',
1744         'only_matching': True,
1745     }]
1746
1747     def _real_extract(self, url):
1748         raise ExtractorError(
1749             'Did you forget to quote the URL? Remember that & is a meta '
1750             'character in most shells, so you want to put the URL in quotes, '
1751             'like  youtube-dl '
1752             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1753             ' or simply  youtube-dl BaW_jenozKc  .',
1754             expected=True)
1755
1756
1757 class YoutubeTruncatedIDIE(InfoExtractor):
1758     IE_NAME = 'youtube:truncated_id'
1759     IE_DESC = False  # Do not list
1760     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1761
1762     _TESTS = [{
1763         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1764         'only_matching': True,
1765     }]
1766
1767     def _real_extract(self, url):
1768         video_id = self._match_id(url)
1769         raise ExtractorError(
1770             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
1771             expected=True)