_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import traceback
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from .subtitles import SubtitlesInfoExtractor
  14 from ..jsinterp import JSInterpreter
  15 from ..swfinterp import SWFInterpreter
  16 from ..utils import (
  17     compat_chr,
  18     compat_parse_qs,
  19     compat_urllib_parse,
  20     compat_urllib_request,
  21     compat_urlparse,
  22     compat_str,
  23
  24     clean_html,
  25     get_element_by_id,
  26     get_element_by_attribute,
  27     ExtractorError,
  28     int_or_none,
  29     OnDemandPagedList,
  30     unescapeHTML,
  31     unified_strdate,
  32     orderedSet,
  33     uppercase_escape,
  34 )
  35
  36 class YoutubeBaseInfoExtractor(InfoExtractor):
  37     """Provide base functions for Youtube extractors"""
  38     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  39     _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def _set_language(self):
  47         return bool(self._download_webpage(
  48             self._LANG_URL, None,
  49             note='Setting language', errnote='unable to set language',
  50             fatal=False))
  51
  52     def _login(self):
  53         """
  54         Attempt to log in to YouTube.
  55         True is returned if successful or skipped.
  56         False is returned if login failed.
  57
  58         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  59         """
  60         (username, password) = self._get_login_info()
  61         # No authentication to be performed
  62         if username is None:
  63             if self._LOGIN_REQUIRED:
  64                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  65             return True
  66
  67         login_page = self._download_webpage(
  68             self._LOGIN_URL, None,
  69             note='Downloading login page',
  70             errnote='unable to fetch login page', fatal=False)
  71         if login_page is False:
  72             return
  73
  74         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  75                                   login_page, 'Login GALX parameter')
  76
  77         # Log in
  78         login_form_strs = {
  79                 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  80                 'Email': username,
  81                 'GALX': galx,
  82                 'Passwd': password,
  83
  84                 'PersistentCookie': 'yes',
  85                 '_utf8': '霱',
  86                 'bgresponse': 'js_disabled',
  87                 'checkConnection': '',
  88                 'checkedDomains': 'youtube',
  89                 'dnConn': '',
  90                 'pstMsg': '0',
  91                 'rmShown': '1',
  92                 'secTok': '',
  93                 'signIn': 'Sign in',
  94                 'timeStmp': '',
  95                 'service': 'youtube',
  96                 'uilel': '3',
  97                 'hl': 'en_US',
  98         }
  99
 100         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 101         # chokes on unicode
 102         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 103         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 104
 105         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 106         login_results = self._download_webpage(
 107             req, None,
 108             note='Logging in', errnote='unable to log in', fatal=False)
 109         if login_results is False:
 110             return False
 111
 112         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 113             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 114
 115         # Two-Factor
 116         # TODO add SMS and phone call support - these require making a request and then prompting the user
 117
 118         if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
 119             tfa_code = self._get_tfa_info()
 120
 121             if tfa_code is None:
 122                 self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
 123                 self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 124                 return False
 125
 126             # Unlike the first login form, secTok and timeStmp are both required for the TFA form
 127
 128             match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
 129             if match is None:
 130                 self._downloader.report_warning('Failed to get secTok - did the page structure change?')
 131             secTok = match.group(1)
 132             match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
 133             if match is None:
 134                 self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
 135             timeStmp = match.group(1)
 136
 137             tfa_form_strs = {
 138                 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
 139                 'smsToken': '',
 140                 'smsUserPin': tfa_code,
 141                 'smsVerifyPin': 'Verify',
 142
 143                 'PersistentCookie': 'yes',
 144                 'checkConnection': '',
 145                 'checkedDomains': 'youtube',
 146                 'pstMsg': '1',
 147                 'secTok': secTok,
 148                 'timeStmp': timeStmp,
 149                 'service': 'youtube',
 150                 'hl': 'en_US',
 151             }
 152             tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
 153             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
 154
 155             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
 156             tfa_results = self._download_webpage(
 157                 tfa_req, None,
 158                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 159
 160             if tfa_results is False:
 161                 return False
 162
 163             if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
 164                 self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
 165                 return False
 166             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
 167                 self._downloader.report_warning('unable to log in - did the page structure change?')
 168                 return False
 169             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 170                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 171                 return False
 172
 173         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 174             self._downloader.report_warning('unable to log in: bad username or password')
 175             return False
 176         return True
 177
 178     def _confirm_age(self):
 179         age_form = {
 180             'next_url': '/',
 181             'action_confirm': 'Confirm',
 182         }
 183         req = compat_urllib_request.Request(self._AGE_URL,
 184             compat_urllib_parse.urlencode(age_form).encode('ascii'))
 185
 186         self._download_webpage(
 187             req, None,
 188             note='Confirming age', errnote='Unable to confirm age',
 189             fatal=False)
 190
 191     def _real_initialize(self):
 192         if self._downloader is None:
 193             return
 194         if self._get_login_info()[0] is not None:
 195             if not self._set_language():
 196                 return
 197         if not self._login():
 198             return
 199         self._confirm_age()
 200
 201
 202 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 203     IE_DESC = 'YouTube.com'
 204     _VALID_URL = r"""(?x)^
 205                      (
 206                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 207                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 208                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 209                             (?:www\.)?pwnyoutube\.com/|
 210                             (?:www\.)?yourepeat\.com/|
 211                             tube\.majestyc\.net/|
 212                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 213                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 214                          (?:                                                  # the various things that can precede the ID:
 215                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 216                              |(?:                                             # or the v= param in all its forms
 217                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 218                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 219                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 220                                  v=
 221                              )
 222                          ))
 223                          |youtu\.be/                                          # just youtu.be/xxxx
 224                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 225                          )
 226                      )?                                                       # all until now is optional -> you can pass the naked ID
 227                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 228                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 229                      (?(1).+)?                                                # if we found the ID, everything can follow
 230                      $"""
 231     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 232     _formats = {
 233         '5': {'ext': 'flv', 'width': 400, 'height': 240},
 234         '6': {'ext': 'flv', 'width': 450, 'height': 270},
 235         '13': {'ext': '3gp'},
 236         '17': {'ext': '3gp', 'width': 176, 'height': 144},
 237         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
 238         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
 239         '34': {'ext': 'flv', 'width': 640, 'height': 360},
 240         '35': {'ext': 'flv', 'width': 854, 'height': 480},
 241         '36': {'ext': '3gp', 'width': 320, 'height': 240},
 242         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
 243         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
 244         '43': {'ext': 'webm', 'width': 640, 'height': 360},
 245         '44': {'ext': 'webm', 'width': 854, 'height': 480},
 246         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
 247         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
 248
 249
 250         # 3d videos
 251         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
 252         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
 253         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
 254         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
 255         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
 256         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
 257         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 258
 259         # Apple HTTP Live Streaming
 260         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 261         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
 262         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
 263         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
 264         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
 265         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 266         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 267
 268         # DASH mp4 video
 269         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 270         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 271         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 272         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 273         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 274         '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 275         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 276         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 277
 278         # Dash mp4 audio
 279         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
 280         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
 281         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 282
 283         # Dash webm
 284         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 285         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 286         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 287         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 288         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 289         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
 290         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
 291         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 292         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 293         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 294         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 295         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 296         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 297         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 298         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 299         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 300
 301         # Dash webm audio
 302         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 303         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 304
 305         # Dash mov
 306         '298': {'ext': 'mov', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 307         '299': {'ext': 'mov', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 308
 309         # RTMP (unnamed)
 310         '_rtmp': {'protocol': 'rtmp'},
 311     }
 312
 313     IE_NAME = 'youtube'
 314     _TESTS = [
 315         {
 316             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
 317             'info_dict': {
 318                 'id': 'BaW_jenozKc',
 319                 'ext': 'mp4',
 320                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 321                 'uploader': 'Philipp Hagemeister',
 322                 'uploader_id': 'phihag',
 323                 'upload_date': '20121002',
 324                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 325                 'categories': ['Science & Technology'],
 326                 'like_count': int,
 327                 'dislike_count': int,
 328             }
 329         },
 330         {
 331             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 332             'note': 'Test generic use_cipher_signature video (#897)',
 333             'info_dict': {
 334                 'id': 'UxxajLWwzqY',
 335                 'ext': 'mp4',
 336                 'upload_date': '20120506',
 337                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 338                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
 339                 'uploader': 'Icona Pop',
 340                 'uploader_id': 'IconaPop',
 341             }
 342         },
 343         {
 344             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 345             'note': 'Test VEVO video with age protection (#956)',
 346             'info_dict': {
 347                 'id': '07FYdnEawAQ',
 348                 'ext': 'mp4',
 349                 'upload_date': '20130703',
 350                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 351                 'description': 'md5:64249768eec3bc4276236606ea996373',
 352                 'uploader': 'justintimberlakeVEVO',
 353                 'uploader_id': 'justintimberlakeVEVO',
 354             }
 355         },
 356         {
 357             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 358             'note': 'Embed-only video (#1746)',
 359             'info_dict': {
 360                 'id': 'yZIXLfi8CZQ',
 361                 'ext': 'mp4',
 362                 'upload_date': '20120608',
 363                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 364                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 365                 'uploader': 'SET India',
 366                 'uploader_id': 'setindia'
 367             }
 368         },
 369         {
 370             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 371             'note': '256k DASH audio (format 141) via DASH manifest',
 372             'info_dict': {
 373                 'id': 'a9LDPn-MO4I',
 374                 'ext': 'm4a',
 375                 'upload_date': '20121002',
 376                 'uploader_id': '8KVIDEO',
 377                 'description': '',
 378                 'uploader': '8KVIDEO',
 379                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 380             },
 381             'params': {
 382                 'youtube_include_dash_manifest': True,
 383                 'format': '141',
 384             },
 385         },
 386         # DASH manifest with encrypted signature
 387         {
 388             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 389             'info_dict': {
 390                 'id': 'IB3lcPjvWLA',
 391                 'ext': 'm4a',
 392                 'title': 'Afrojack - The Spark ft. Spree Wilson',
 393                 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
 394                 'uploader': 'AfrojackVEVO',
 395                 'uploader_id': 'AfrojackVEVO',
 396                 'upload_date': '20131011',
 397             },
 398             'params': {
 399                 'youtube_include_dash_manifest': True,
 400                 'format': '141',
 401             },
 402         },
 403     ]
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_info_webpage_download(self, video_id):
 410         """Report attempt to download video info webpage."""
 411         self.to_screen('%s: Downloading video info webpage' % video_id)
 412
 413     def report_information_extraction(self, video_id):
 414         """Report attempt to extract video information."""
 415         self.to_screen('%s: Extracting video information' % video_id)
 416
 417     def report_unavailable_format(self, video_id, format):
 418         """Report extracted video URL."""
 419         self.to_screen('%s: Format %s not available' % (video_id, format))
 420
 421     def report_rtmp_download(self):
 422         """Indicate the download will use the RTMP protocol."""
 423         self.to_screen('RTMP download detected')
 424
 425     def _signature_cache_id(self, example_sig):
 426         """ Return a string representation of a signature """
 427         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 428
 429     def _extract_signature_function(self, video_id, player_url, example_sig):
 430         id_m = re.match(
 431             r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
 432             player_url)
 433         if not id_m:
 434             raise ExtractorError('Cannot identify player %r' % player_url)
 435         player_type = id_m.group('ext')
 436         player_id = id_m.group('id')
 437
 438         # Read from filesystem cache
 439         func_id = '%s_%s_%s' % (
 440             player_type, player_id, self._signature_cache_id(example_sig))
 441         assert os.path.basename(func_id) == func_id
 442
 443         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 444         if cache_spec is not None:
 445             return lambda s: ''.join(s[i] for i in cache_spec)
 446
 447         if player_type == 'js':
 448             code = self._download_webpage(
 449                 player_url, video_id,
 450                 note='Downloading %s player %s' % (player_type, player_id),
 451                 errnote='Download of %s failed' % player_url)
 452             res = self._parse_sig_js(code)
 453         elif player_type == 'swf':
 454             urlh = self._request_webpage(
 455                 player_url, video_id,
 456                 note='Downloading %s player %s' % (player_type, player_id),
 457                 errnote='Download of %s failed' % player_url)
 458             code = urlh.read()
 459             res = self._parse_sig_swf(code)
 460         else:
 461             assert False, 'Invalid player type %r' % player_type
 462
 463         if cache_spec is None:
 464             test_string = ''.join(map(compat_chr, range(len(example_sig))))
 465             cache_res = res(test_string)
 466             cache_spec = [ord(c) for c in cache_res]
 467
 468         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 469         return res
 470
 471     def _print_sig_code(self, func, example_sig):
 472         def gen_sig_code(idxs):
 473             def _genslice(start, end, step):
 474                 starts = '' if start == 0 else str(start)
 475                 ends = (':%d' % (end+step)) if end + step >= 0 else ':'
 476                 steps = '' if step == 1 else (':%d' % step)
 477                 return 's[%s%s%s]' % (starts, ends, steps)
 478
 479             step = None
 480             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 481                                     # set as soon as step is set
 482             for i, prev in zip(idxs[1:], idxs[:-1]):
 483                 if step is not None:
 484                     if i - prev == step:
 485                         continue
 486                     yield _genslice(start, prev, step)
 487                     step = None
 488                     continue
 489                 if i - prev in [-1, 1]:
 490                     step = i - prev
 491                     start = prev
 492                     continue
 493                 else:
 494                     yield 's[%d]' % prev
 495             if step is None:
 496                 yield 's[%d]' % i
 497             else:
 498                 yield _genslice(start, i, step)
 499
 500         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 501         cache_res = func(test_string)
 502         cache_spec = [ord(c) for c in cache_res]
 503         expr_code = ' + '.join(gen_sig_code(cache_spec))
 504         signature_id_tuple = '(%s)' % (
 505             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 506         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 507                 '    return %s\n') % (signature_id_tuple, expr_code)
 508         self.to_screen('Extracted signature function:\n' + code)
 509
 510     def _parse_sig_js(self, jscode):
 511         funcname = self._search_regex(
 512             r'signature=([$a-zA-Z]+)', jscode,
 513              'Initial JS player signature function name')
 514
 515         jsi = JSInterpreter(jscode)
 516         initial_function = jsi.extract_function(funcname)
 517         return lambda s: initial_function([s])
 518
 519     def _parse_sig_swf(self, file_contents):
 520         swfi = SWFInterpreter(file_contents)
 521         TARGET_CLASSNAME = 'SignatureDecipher'
 522         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 523         initial_function = swfi.extract_function(searched_class, 'decipher')
 524         return lambda s: initial_function([s])
 525
 526     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 527         """Turn the encrypted s field into a working signature"""
 528
 529         if player_url is None:
 530             raise ExtractorError('Cannot decrypt signature without player_url')
 531
 532         if player_url.startswith('//'):
 533             player_url = 'https:' + player_url
 534         try:
 535             player_id = (player_url, self._signature_cache_id(s))
 536             if player_id not in self._player_cache:
 537                 func = self._extract_signature_function(
 538                     video_id, player_url, s
 539                 )
 540                 self._player_cache[player_id] = func
 541             func = self._player_cache[player_id]
 542             if self._downloader.params.get('youtube_print_sig_code'):
 543                 self._print_sig_code(func, s)
 544             return func(s)
 545         except Exception as e:
 546             tb = traceback.format_exc()
 547             raise ExtractorError(
 548                 'Signature extraction failed: ' + tb, cause=e)
 549
 550     def _get_available_subtitles(self, video_id, webpage):
 551         try:
 552             sub_list = self._download_webpage(
 553                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
 554                 video_id, note=False)
 555         except ExtractorError as err:
 556             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
 557             return {}
 558         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
 559
 560         sub_lang_list = {}
 561         for l in lang_list:
 562             lang = l[1]
 563             if lang in sub_lang_list:
 564                 continue
 565             params = compat_urllib_parse.urlencode({
 566                 'lang': lang,
 567                 'v': video_id,
 568                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
 569                 'name': unescapeHTML(l[0]).encode('utf-8'),
 570             })
 571             url = 'https://www.youtube.com/api/timedtext?' + params
 572             sub_lang_list[lang] = url
 573         if not sub_lang_list:
 574             self._downloader.report_warning('video doesn\'t have subtitles')
 575             return {}
 576         return sub_lang_list
 577
 578     def _get_available_automatic_caption(self, video_id, webpage):
 579         """We need the webpage for getting the captions url, pass it as an
 580            argument to speed up the process."""
 581         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
 582         self.to_screen('%s: Looking for automatic captions' % video_id)
 583         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
 584         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
 585         if mobj is None:
 586             self._downloader.report_warning(err_msg)
 587             return {}
 588         player_config = json.loads(mobj.group(1))
 589         try:
 590             args = player_config[u'args']
 591             caption_url = args[u'ttsurl']
 592             timestamp = args[u'timestamp']
 593             # We get the available subtitles
 594             list_params = compat_urllib_parse.urlencode({
 595                 'type': 'list',
 596                 'tlangs': 1,
 597                 'asrs': 1,
 598             })
 599             list_url = caption_url + '&' + list_params
 600             caption_list = self._download_xml(list_url, video_id)
 601             original_lang_node = caption_list.find('track')
 602             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
 603                 self._downloader.report_warning('Video doesn\'t have automatic captions')
 604                 return {}
 605             original_lang = original_lang_node.attrib['lang_code']
 606
 607             sub_lang_list = {}
 608             for lang_node in caption_list.findall('target'):
 609                 sub_lang = lang_node.attrib['lang_code']
 610                 params = compat_urllib_parse.urlencode({
 611                     'lang': original_lang,
 612                     'tlang': sub_lang,
 613                     'fmt': sub_format,
 614                     'ts': timestamp,
 615                     'kind': 'asr',
 616                 })
 617                 sub_lang_list[sub_lang] = caption_url + '&' + params
 618             return sub_lang_list
 619         # An extractor error can be raise by the download process if there are
 620         # no automatic captions but there are subtitles
 621         except (KeyError, ExtractorError):
 622             self._downloader.report_warning(err_msg)
 623             return {}
 624
 625     @classmethod
 626     def extract_id(cls, url):
 627         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
 628         if mobj is None:
 629             raise ExtractorError('Invalid URL: %s' % url)
 630         video_id = mobj.group(2)
 631         return video_id
 632
 633     def _extract_from_m3u8(self, manifest_url, video_id):
 634         url_map = {}
 635         def _get_urls(_manifest):
 636             lines = _manifest.split('\n')
 637             urls = filter(lambda l: l and not l.startswith('#'),
 638                             lines)
 639             return urls
 640         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
 641         formats_urls = _get_urls(manifest)
 642         for format_url in formats_urls:
 643             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 644             url_map[itag] = format_url
 645         return url_map
 646
 647     def _extract_annotations(self, video_id):
 648         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 649         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
 650
 651     def _real_extract(self, url):
 652         proto = (
 653             'http' if self._downloader.params.get('prefer_insecure', False)
 654             else 'https')
 655
 656         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
 657         mobj = re.search(self._NEXT_URL_RE, url)
 658         if mobj:
 659             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
 660         video_id = self.extract_id(url)
 661
 662         # Get video webpage
 663         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 664         pref_cookies = [
 665             c for c in self._downloader.cookiejar
 666             if c.domain == '.youtube.com' and c.name == 'PREF']
 667         for pc in pref_cookies:
 668             if 'hl=' in pc.value:
 669                 pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
 670             else:
 671                 if pc.value:
 672                     pc.value += '&'
 673                 pc.value += 'hl=en'
 674         video_webpage = self._download_webpage(url, video_id)
 675
 676         # Attempt to extract SWF player URL
 677         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
 678         if mobj is not None:
 679             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
 680         else:
 681             player_url = None
 682
 683         # Get video info
 684         self.report_video_info_webpage_download(video_id)
 685         if re.search(r'player-age-gate-content">', video_webpage) is not None:
 686             self.report_age_confirmation()
 687             age_gate = True
 688             # We simulate the access to the video from www.youtube.com/v/{video_id}
 689             # this can be viewed without login into Youtube
 690             data = compat_urllib_parse.urlencode({
 691                 'video_id': video_id,
 692                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
 693                 'sts': self._search_regex(
 694                     r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
 695             })
 696             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
 697             video_info_webpage = self._download_webpage(video_info_url, video_id,
 698                                     note=False,
 699                                     errnote='unable to download video info webpage')
 700             video_info = compat_parse_qs(video_info_webpage)
 701         else:
 702             age_gate = False
 703             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
 704                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
 705                         % (video_id, el_type))
 706                 video_info_webpage = self._download_webpage(video_info_url, video_id,
 707                                         note=False,
 708                                         errnote='unable to download video info webpage')
 709                 video_info = compat_parse_qs(video_info_webpage)
 710                 if 'token' in video_info:
 711                     break
 712         if 'token' not in video_info:
 713             if 'reason' in video_info:
 714                 raise ExtractorError(
 715                     'YouTube said: %s' % video_info['reason'][0],
 716                     expected=True, video_id=video_id)
 717             else:
 718                 raise ExtractorError(
 719                     '"token" parameter not in video info for unknown reason',
 720                     video_id=video_id)
 721
 722         if 'view_count' in video_info:
 723             view_count = int(video_info['view_count'][0])
 724         else:
 725             view_count = None
 726
 727         # Check for "rental" videos
 728         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
 729             raise ExtractorError('"rental" videos not supported')
 730
 731         # Start extracting information
 732         self.report_information_extraction(video_id)
 733
 734         # uploader
 735         if 'author' not in video_info:
 736             raise ExtractorError('Unable to extract uploader name')
 737         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
 738
 739         # uploader_id
 740         video_uploader_id = None
 741         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
 742         if mobj is not None:
 743             video_uploader_id = mobj.group(1)
 744         else:
 745             self._downloader.report_warning('unable to extract uploader nickname')
 746
 747         # title
 748         if 'title' in video_info:
 749             video_title = video_info['title'][0]
 750         else:
 751             self._downloader.report_warning('Unable to extract video title')
 752             video_title = '_'
 753
 754         # thumbnail image
 755         # We try first to get a high quality image:
 756         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
 757                             video_webpage, re.DOTALL)
 758         if m_thumb is not None:
 759             video_thumbnail = m_thumb.group(1)
 760         elif 'thumbnail_url' not in video_info:
 761             self._downloader.report_warning('unable to extract video thumbnail')
 762             video_thumbnail = None
 763         else:   # don't panic if we can't find it
 764             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
 765
 766         # upload date
 767         upload_date = None
 768         mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
 769         if mobj is None:
 770             mobj = re.search(
 771                 r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
 772                 video_webpage)
 773         if mobj is not None:
 774             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
 775             upload_date = unified_strdate(upload_date)
 776
 777         m_cat_container = self._search_regex(
 778             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
 779             video_webpage, 'categories', fatal=False)
 780         if m_cat_container:
 781             category = self._html_search_regex(
 782                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
 783                 default=None)
 784             video_categories = None if category is None else [category]
 785         else:
 786             video_categories = None
 787
 788         # description
 789         video_description = get_element_by_id("eow-description", video_webpage)
 790         if video_description:
 791             video_description = re.sub(r'''(?x)
 792                 <a\s+
 793                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
 794                     title="([^"]+)"\s+
 795                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
 796                     class="yt-uix-redirect-link"\s*>
 797                 [^<]+
 798                 </a>
 799             ''', r'\1', video_description)
 800             video_description = clean_html(video_description)
 801         else:
 802             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
 803             if fd_mobj:
 804                 video_description = unescapeHTML(fd_mobj.group(1))
 805             else:
 806                 video_description = ''
 807
 808         def _extract_count(count_name):
 809             count = self._search_regex(
 810                 r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
 811                 video_webpage, count_name, default=None)
 812             if count is not None:
 813                 return int(count.replace(',', ''))
 814             return None
 815         like_count = _extract_count('like')
 816         dislike_count = _extract_count('dislike')
 817
 818         # subtitles
 819         video_subtitles = self.extract_subtitles(video_id, video_webpage)
 820
 821         if self._downloader.params.get('listsubtitles', False):
 822             self._list_available_subtitles(video_id, video_webpage)
 823             return
 824
 825         if 'length_seconds' not in video_info:
 826             self._downloader.report_warning('unable to extract video duration')
 827             video_duration = None
 828         else:
 829             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
 830
 831         # annotations
 832         video_annotations = None
 833         if self._downloader.params.get('writeannotations', False):
 834                 video_annotations = self._extract_annotations(video_id)
 835
 836         # Decide which formats to download
 837         try:
 838             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
 839             if not mobj:
 840                 raise ValueError('Could not find vevo ID')
 841             json_code = uppercase_escape(mobj.group(1))
 842             ytplayer_config = json.loads(json_code)
 843             args = ytplayer_config['args']
 844             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
 845             # this signatures are encrypted
 846             if 'url_encoded_fmt_stream_map' not in args:
 847                 raise ValueError('No stream_map present')  # caught below
 848             re_signature = re.compile(r'[&,]s=')
 849             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
 850             if m_s is not None:
 851                 self.to_screen('%s: Encrypted signatures detected.' % video_id)
 852                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
 853             m_s = re_signature.search(args.get('adaptive_fmts', ''))
 854             if m_s is not None:
 855                 if 'adaptive_fmts' in video_info:
 856                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
 857                 else:
 858                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
 859         except ValueError:
 860             pass
 861
 862         def _map_to_format_list(urlmap):
 863             formats = []
 864             for itag, video_real_url in urlmap.items():
 865                 dct = {
 866                     'format_id': itag,
 867                     'url': video_real_url,
 868                     'player_url': player_url,
 869                 }
 870                 if itag in self._formats:
 871                     dct.update(self._formats[itag])
 872                 formats.append(dct)
 873             return formats
 874
 875         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 876             self.report_rtmp_download()
 877             formats = [{
 878                 'format_id': '_rtmp',
 879                 'protocol': 'rtmp',
 880                 'url': video_info['conn'][0],
 881                 'player_url': player_url,
 882             }]
 883         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
 884             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
 885             if 'rtmpe%3Dyes' in encoded_url_map:
 886                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
 887             url_map = {}
 888             for url_data_str in encoded_url_map.split(','):
 889                 url_data = compat_parse_qs(url_data_str)
 890                 if 'itag' not in url_data or 'url' not in url_data:
 891                     continue
 892                 format_id = url_data['itag'][0]
 893                 url = url_data['url'][0]
 894
 895                 if 'sig' in url_data:
 896                     url += '&signature=' + url_data['sig'][0]
 897                 elif 's' in url_data:
 898                     encrypted_sig = url_data['s'][0]
 899
 900                     if not age_gate:
 901                         jsplayer_url_json = self._search_regex(
 902                             r'"assets":.+?"js":\s*("[^"]+")',
 903                             video_webpage, 'JS player URL')
 904                         player_url = json.loads(jsplayer_url_json)
 905                     if player_url is None:
 906                         player_url_json = self._search_regex(
 907                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
 908                             video_webpage, 'age gate player URL')
 909                         player_url = json.loads(player_url_json)
 910
 911                     if self._downloader.params.get('verbose'):
 912                         if player_url is None:
 913                             player_version = 'unknown'
 914                             player_desc = 'unknown'
 915                         else:
 916                             if player_url.endswith('swf'):
 917                                 player_version = self._search_regex(
 918                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
 919                                     'flash player', fatal=False)
 920                                 player_desc = 'flash player %s' % player_version
 921                             else:
 922                                 player_version = self._search_regex(
 923                                     r'html5player-([^/]+?)(?:/html5player)?\.js',
 924                                     player_url,
 925                                     'html5 player', fatal=False)
 926                                 player_desc = 'html5 player %s' % player_version
 927
 928                         parts_sizes = self._signature_cache_id(encrypted_sig)
 929                         self.to_screen('{%s} signature length %s, %s' %
 930                             (format_id, parts_sizes, player_desc))
 931
 932                     signature = self._decrypt_signature(
 933                         encrypted_sig, video_id, player_url, age_gate)
 934                     url += '&signature=' + signature
 935                 if 'ratebypass' not in url:
 936                     url += '&ratebypass=yes'
 937                 url_map[format_id] = url
 938             formats = _map_to_format_list(url_map)
 939         elif video_info.get('hlsvp'):
 940             manifest_url = video_info['hlsvp'][0]
 941             url_map = self._extract_from_m3u8(manifest_url, video_id)
 942             formats = _map_to_format_list(url_map)
 943         else:
 944             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 945
 946         # Look for the DASH manifest
 947         if self._downloader.params.get('youtube_include_dash_manifest', True):
 948             try:
 949                 # The DASH manifest used needs to be the one from the original video_webpage.
 950                 # The one found in get_video_info seems to be using different signatures.
 951                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
 952                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
 953                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
 954                 if age_gate:
 955                     dash_manifest_url = video_info.get('dashmpd')[0]
 956                 else:
 957                     dash_manifest_url = ytplayer_config['args']['dashmpd']
 958                 def decrypt_sig(mobj):
 959                     s = mobj.group(1)
 960                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
 961                     return '/signature/%s' % dec_s
 962                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
 963                 dash_doc = self._download_xml(
 964                     dash_manifest_url, video_id,
 965                     note='Downloading DASH manifest',
 966                     errnote='Could not download DASH manifest')
 967                 for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
 968                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
 969                     if url_el is None:
 970                         continue
 971                     format_id = r.attrib['id']
 972                     video_url = url_el.text
 973                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
 974                     f = {
 975                         'format_id': format_id,
 976                         'url': video_url,
 977                         'width': int_or_none(r.attrib.get('width')),
 978                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
 979                         'asr': int_or_none(r.attrib.get('audioSamplingRate')),
 980                         'filesize': filesize,
 981                     }
 982                     try:
 983                         existing_format = next(
 984                             fo for fo in formats
 985                             if fo['format_id'] == format_id)
 986                     except StopIteration:
 987                         f.update(self._formats.get(format_id, {}))
 988                         formats.append(f)
 989                     else:
 990                         existing_format.update(f)
 991
 992             except (ExtractorError, KeyError) as e:
 993                 self.report_warning('Skipping DASH manifest: %s' % e, video_id)
 994
 995         self._sort_formats(formats)
 996
 997         return {
 998             'id':           video_id,
 999             'uploader':     video_uploader,
1000             'uploader_id':  video_uploader_id,
1001             'upload_date':  upload_date,
1002             'title':        video_title,
1003             'thumbnail':    video_thumbnail,
1004             'description':  video_description,
1005             'categories':   video_categories,
1006             'subtitles':    video_subtitles,
1007             'duration':     video_duration,
1008             'age_limit':    18 if age_gate else 0,
1009             'annotations':  video_annotations,
1010             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1011             'view_count':   view_count,
1012             'like_count': like_count,
1013             'dislike_count': dislike_count,
1014             'formats':      formats,
1015         }
1016
1017 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1018     IE_DESC = 'YouTube.com playlists'
1019     _VALID_URL = r"""(?x)(?:
1020                         (?:https?://)?
1021                         (?:\w+\.)?
1022                         youtube\.com/
1023                         (?:
1024                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1025                            \? (?:.*?&)*? (?:p|a|list)=
1026                         |  p/
1027                         )
1028                         (
1029                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
1030                             # Top tracks, they can also include dots
1031                             |(?:MC)[\w\.]*
1032                         )
1033                         .*
1034                      |
1035                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
1036                      )"""
1037     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1038     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1039     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1040     IE_NAME = 'youtube:playlist'
1041     _TESTS = [{
1042         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1043         'info_dict': {
1044             'title': 'ytdl test PL',
1045         },
1046         'playlist_count': 3,
1047     }, {
1048         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1049         'info_dict': {
1050             'title': 'YDL_Empty_List',
1051         },
1052         'playlist_count': 0,
1053     }, {
1054         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1055         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1056         'info_dict': {
1057             'title': '29C3: Not my department',
1058         },
1059         'playlist_count': 95,
1060     }, {
1061         'note': 'issue #673',
1062         'url': 'PLBB231211A4F62143',
1063         'info_dict': {
1064             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1065         },
1066         'playlist_mincount': 26,
1067     }, {
1068         'note': 'Large playlist',
1069         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1070         'info_dict': {
1071             'title': 'Uploads from Cauchemar',
1072         },
1073         'playlist_mincount': 799,
1074     }, {
1075         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1076         'info_dict': {
1077             'title': 'YDL_safe_search',
1078         },
1079         'playlist_count': 2,
1080     }, {
1081         'note': 'embedded',
1082         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1083         'playlist_count': 4,
1084         'info_dict': {
1085             'title': 'JODA15',
1086         }
1087     }, {
1088         'note': 'Embedded SWF player',
1089         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1090         'playlist_count': 4,
1091         'info_dict': {
1092             'title': 'JODA7',
1093         }
1094     }]
1095
1096     def _real_initialize(self):
1097         self._login()
1098
1099     def _ids_to_results(self, ids):
1100         return [
1101             self.url_result(vid_id, 'Youtube', video_id=vid_id)
1102             for vid_id in ids]
1103
1104     def _extract_mix(self, playlist_id):
1105         # The mixes are generated from a a single video
1106         # the id of the playlist is just 'RD' + video_id
1107         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1108         webpage = self._download_webpage(
1109             url, playlist_id, 'Downloading Youtube mix')
1110         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1111         title_span = (
1112             search_title('playlist-title') or
1113             search_title('title long-title') or
1114             search_title('title'))
1115         title = clean_html(title_span)
1116         ids = orderedSet(re.findall(
1117             r'''(?xs)data-video-username=".*?".*?
1118                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1119             webpage))
1120         url_results = self._ids_to_results(ids)
1121
1122         return self.playlist_result(url_results, playlist_id, title)
1123
1124     def _real_extract(self, url):
1125         # Extract playlist id
1126         mobj = re.match(self._VALID_URL, url)
1127         if mobj is None:
1128             raise ExtractorError('Invalid URL: %s' % url)
1129         playlist_id = mobj.group(1) or mobj.group(2)
1130
1131         # Check if it's a video-specific URL
1132         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1133         if 'v' in query_dict:
1134             video_id = query_dict['v'][0]
1135             if self._downloader.params.get('noplaylist'):
1136                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1137                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1138             else:
1139                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1140
1141         if playlist_id.startswith('RD'):
1142             # Mixes require a custom extraction process
1143             return self._extract_mix(playlist_id)
1144         if playlist_id.startswith('TL'):
1145             raise ExtractorError('For downloading YouTube.com top lists, use '
1146                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
1147
1148         url = self._TEMPLATE_URL % playlist_id
1149         page = self._download_webpage(url, playlist_id)
1150         more_widget_html = content_html = page
1151
1152         # Check if the playlist exists or is private
1153         if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
1154             raise ExtractorError(
1155                 'The playlist doesn\'t exist or is private, use --username or '
1156                 '--netrc to access it.',
1157                 expected=True)
1158
1159         # Extract the video ids from the playlist pages
1160         ids = []
1161
1162         for page_num in itertools.count(1):
1163             matches = re.finditer(self._VIDEO_RE, content_html)
1164             # We remove the duplicates and the link with index 0
1165             # (it's not the first video of the playlist)
1166             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1167             ids.extend(new_ids)
1168
1169             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1170             if not mobj:
1171                 break
1172
1173             more = self._download_json(
1174                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1175                 'Downloading page #%s' % page_num,
1176                 transform_source=uppercase_escape)
1177             content_html = more['content_html']
1178             more_widget_html = more['load_more_widget_html']
1179
1180         playlist_title = self._html_search_regex(
1181             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1182             page, 'title')
1183
1184         url_results = self._ids_to_results(ids)
1185         return self.playlist_result(url_results, playlist_id, playlist_title)
1186
1187
1188 class YoutubeTopListIE(YoutubePlaylistIE):
1189     IE_NAME = 'youtube:toplist'
1190     IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1191         ' (Example: "yttoplist:music:Top Tracks")')
1192     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1193     _TESTS = [{
1194         'url': 'yttoplist:music:Trending',
1195         'playlist_mincount': 5,
1196         'skip': 'Only works for logged-in users',
1197     }]
1198
1199     def _real_extract(self, url):
1200         mobj = re.match(self._VALID_URL, url)
1201         channel = mobj.group('chann')
1202         title = mobj.group('title')
1203         query = compat_urllib_parse.urlencode({'title': title})
1204         channel_page = self._download_webpage(
1205             'https://www.youtube.com/%s' % channel, title)
1206         link = self._html_search_regex(
1207             r'''(?x)
1208                 <a\s+href="([^"]+)".*?>\s*
1209                 <span\s+class="branded-page-module-title-text">\s*
1210                 <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
1211             channel_page, 'list')
1212         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1213
1214         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1215         ids = []
1216         # sometimes the webpage doesn't contain the videos
1217         # retry until we get them
1218         for i in itertools.count(0):
1219             msg = 'Downloading Youtube mix'
1220             if i > 0:
1221                 msg += ', retry #%d' % i
1222
1223             webpage = self._download_webpage(url, title, msg)
1224             ids = orderedSet(re.findall(video_re, webpage))
1225             if ids:
1226                 break
1227         url_results = self._ids_to_results(ids)
1228         return self.playlist_result(url_results, playlist_title=title)
1229
1230
1231 class YoutubeChannelIE(InfoExtractor):
1232     IE_DESC = 'YouTube.com channels'
1233     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1234     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1235     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1236     IE_NAME = 'youtube:channel'
1237     _TESTS = [{
1238         'note': 'paginated channel',
1239         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1240         'playlist_mincount': 91,
1241     }]
1242
1243     def extract_videos_from_page(self, page):
1244         ids_in_page = []
1245         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1246             if mobj.group(1) not in ids_in_page:
1247                 ids_in_page.append(mobj.group(1))
1248         return ids_in_page
1249
1250     def _real_extract(self, url):
1251         # Extract channel id
1252         mobj = re.match(self._VALID_URL, url)
1253         if mobj is None:
1254             raise ExtractorError('Invalid URL: %s' % url)
1255
1256         # Download channel page
1257         channel_id = mobj.group(1)
1258         video_ids = []
1259         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1260         channel_page = self._download_webpage(url, channel_id)
1261         autogenerated = re.search(r'''(?x)
1262                 class="[^"]*?(?:
1263                     channel-header-autogenerated-label|
1264                     yt-channel-title-autogenerated
1265                 )[^"]*"''', channel_page) is not None
1266
1267         if autogenerated:
1268             # The videos are contained in a single page
1269             # the ajax pages can't be used, they are empty
1270             video_ids = self.extract_videos_from_page(channel_page)
1271         else:
1272             # Download all channel pages using the json-based channel_ajax query
1273             for pagenum in itertools.count(1):
1274                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1275                 page = self._download_json(
1276                     url, channel_id, note='Downloading page #%s' % pagenum,
1277                     transform_source=uppercase_escape)
1278
1279                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1280                 video_ids.extend(ids_in_page)
1281
1282                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1283                     break
1284
1285         self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1286
1287         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1288                        for video_id in video_ids]
1289         return self.playlist_result(url_entries, channel_id)
1290
1291
1292 class YoutubeUserIE(InfoExtractor):
1293     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1294     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1295     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
1296     _GDATA_PAGE_SIZE = 50
1297     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1298     IE_NAME = 'youtube:user'
1299
1300     _TESTS = [{
1301         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1302         'playlist_mincount': 320,
1303         'info_dict': {
1304             'title': 'TheLinuxFoundation',
1305         }
1306     }, {
1307         'url': 'ytuser:phihag',
1308         'only_matching': True,
1309     }]
1310
1311     @classmethod
1312     def suitable(cls, url):
1313         # Don't return True if the url can be extracted with other youtube
1314         # extractor, the regex would is too permissive and it would match.
1315         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1316         if any(ie.suitable(url) for ie in other_ies): return False
1317         else: return super(YoutubeUserIE, cls).suitable(url)
1318
1319     def _real_extract(self, url):
1320         # Extract username
1321         mobj = re.match(self._VALID_URL, url)
1322         if mobj is None:
1323             raise ExtractorError('Invalid URL: %s' % url)
1324
1325         username = mobj.group(1)
1326
1327         # Download video ids using YouTube Data API. Result size per
1328         # query is limited (currently to 50 videos) so we need to query
1329         # page by page until there are no video ids - it means we got
1330         # all of them.
1331
1332         def download_page(pagenum):
1333             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1334
1335             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1336             page = self._download_webpage(
1337                 gdata_url, username,
1338                 'Downloading video ids from %d to %d' % (
1339                     start_index, start_index + self._GDATA_PAGE_SIZE))
1340
1341             try:
1342                 response = json.loads(page)
1343             except ValueError as err:
1344                 raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
1345             if 'entry' not in response['feed']:
1346                 return
1347
1348             # Extract video identifiers
1349             entries = response['feed']['entry']
1350             for entry in entries:
1351                 title = entry['title']['$t']
1352                 video_id = entry['id']['$t'].split('/')[-1]
1353                 yield {
1354                     '_type': 'url',
1355                     'url': video_id,
1356                     'ie_key': 'Youtube',
1357                     'id': video_id,
1358                     'title': title,
1359                 }
1360         url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
1361
1362         return self.playlist_result(url_results, playlist_title=username)
1363
1364
1365 class YoutubeSearchIE(SearchInfoExtractor):
1366     IE_DESC = 'YouTube.com searches'
1367     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1368     _MAX_RESULTS = 1000
1369     IE_NAME = 'youtube:search'
1370     _SEARCH_KEY = 'ytsearch'
1371
1372     def _get_n_results(self, query, n):
1373         """Get a specified number of results for a query"""
1374
1375         video_ids = []
1376         pagenum = 0
1377         limit = n
1378         PAGE_SIZE = 50
1379
1380         while (PAGE_SIZE * pagenum) < limit:
1381             result_url = self._API_URL % (
1382                 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1383                 (PAGE_SIZE * pagenum) + 1)
1384             data_json = self._download_webpage(
1385                 result_url, video_id='query "%s"' % query,
1386                 note='Downloading page %s' % (pagenum + 1),
1387                 errnote='Unable to download API page')
1388             data = json.loads(data_json)
1389             api_response = data['data']
1390
1391             if 'items' not in api_response:
1392                 raise ExtractorError(
1393                     '[youtube] No video results', expected=True)
1394
1395             new_ids = list(video['id'] for video in api_response['items'])
1396             video_ids += new_ids
1397
1398             limit = min(n, api_response['totalItems'])
1399             pagenum += 1
1400
1401         if len(video_ids) > n:
1402             video_ids = video_ids[:n]
1403         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1404                   for video_id in video_ids]
1405         return self.playlist_result(videos, query)
1406
1407
1408 class YoutubeSearchDateIE(YoutubeSearchIE):
1409     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1410     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1411     _SEARCH_KEY = 'ytsearchdate'
1412     IE_DESC = 'YouTube.com searches, newest videos first'
1413
1414
1415 class YoutubeSearchURLIE(InfoExtractor):
1416     IE_DESC = 'YouTube.com search URLs'
1417     IE_NAME = 'youtube:search_url'
1418     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1419     _TESTS = [{
1420         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1421         'playlist_mincount': 5,
1422         'info_dict': {
1423             'title': 'youtube-dl test video',
1424         }
1425     }]
1426
1427     def _real_extract(self, url):
1428         mobj = re.match(self._VALID_URL, url)
1429         query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1430
1431         webpage = self._download_webpage(url, query)
1432         result_code = self._search_regex(
1433             r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
1434
1435         part_codes = re.findall(
1436             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1437         entries = []
1438         for part_code in part_codes:
1439             part_title = self._html_search_regex(
1440                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1441             part_url_snippet = self._html_search_regex(
1442                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1443             part_url = compat_urlparse.urljoin(
1444                 'https://www.youtube.com/', part_url_snippet)
1445             entries.append({
1446                 '_type': 'url',
1447                 'url': part_url,
1448                 'title': part_title,
1449             })
1450
1451         return {
1452             '_type': 'playlist',
1453             'entries': entries,
1454             'title': query,
1455         }
1456
1457
1458 class YoutubeShowIE(InfoExtractor):
1459     IE_DESC = 'YouTube.com (multi-season) shows'
1460     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1461     IE_NAME = 'youtube:show'
1462     _TESTS = [{
1463         'url': 'http://www.youtube.com/show/airdisasters',
1464         'playlist_mincount': 3,
1465         'info_dict': {
1466             'id': 'airdisasters',
1467             'title': 'Air Disasters',
1468         }
1469     }]
1470
1471     def _real_extract(self, url):
1472         mobj = re.match(self._VALID_URL, url)
1473         playlist_id = mobj.group('id')
1474         webpage = self._download_webpage(
1475             url, playlist_id, 'Downloading show webpage')
1476         # There's one playlist for each season of the show
1477         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1478         self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1479         entries = [
1480             self.url_result(
1481                 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1482             for season in m_seasons
1483         ]
1484         title = self._og_search_title(webpage, fatal=False)
1485
1486         return {
1487             '_type': 'playlist',
1488             'id': playlist_id,
1489             'title': title,
1490             'entries': entries,
1491         }
1492
1493
1494 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1495     """
1496     Base class for extractors that fetch info from
1497     http://www.youtube.com/feed_ajax
1498     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1499     """
1500     _LOGIN_REQUIRED = True
1501     # use action_load_personal_feed instead of action_load_system_feed
1502     _PERSONAL_FEED = False
1503
1504     @property
1505     def _FEED_TEMPLATE(self):
1506         action = 'action_load_system_feed'
1507         if self._PERSONAL_FEED:
1508             action = 'action_load_personal_feed'
1509         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1510
1511     @property
1512     def IE_NAME(self):
1513         return 'youtube:%s' % self._FEED_NAME
1514
1515     def _real_initialize(self):
1516         self._login()
1517
1518     def _real_extract(self, url):
1519         feed_entries = []
1520         paging = 0
1521         for i in itertools.count(1):
1522             info = self._download_json(self._FEED_TEMPLATE % paging,
1523                                           '%s feed' % self._FEED_NAME,
1524                                           'Downloading page %s' % i)
1525             feed_html = info.get('feed_html') or info.get('content_html')
1526             load_more_widget_html = info.get('load_more_widget_html') or feed_html
1527             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1528             ids = orderedSet(m.group(1) for m in m_ids)
1529             feed_entries.extend(
1530                 self.url_result(video_id, 'Youtube', video_id=video_id)
1531                 for video_id in ids)
1532             mobj = re.search(
1533                 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
1534                 load_more_widget_html)
1535             if mobj is None:
1536                 break
1537             paging = mobj.group('paging')
1538         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1539
1540 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1541     IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1542     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1543     _FEED_NAME = 'recommended'
1544     _PLAYLIST_TITLE = 'Youtube Recommended videos'
1545
1546 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1547     IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1548     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1549     _FEED_NAME = 'watch_later'
1550     _PLAYLIST_TITLE = 'Youtube Watch Later'
1551     _PERSONAL_FEED = True
1552
1553 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1554     IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
1555     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1556     _FEED_NAME = 'history'
1557     _PERSONAL_FEED = True
1558     _PLAYLIST_TITLE = 'Youtube Watch History'
1559
1560 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1561     IE_NAME = 'youtube:favorites'
1562     IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1563     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1564     _LOGIN_REQUIRED = True
1565
1566     def _real_extract(self, url):
1567         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1568         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1569         return self.url_result(playlist_id, 'YoutubePlaylist')
1570
1571
1572 class YoutubeSubscriptionsIE(YoutubePlaylistIE):
1573     IE_NAME = 'youtube:subscriptions'
1574     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1575     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1576     _TESTS = []
1577
1578     def _real_extract(self, url):
1579         title = 'Youtube Subscriptions'
1580         page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
1581
1582         # The extraction process is the same as for playlists, but the regex
1583         # for the video ids doesn't contain an index
1584         ids = []
1585         more_widget_html = content_html = page
1586
1587         for page_num in itertools.count(1):
1588             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1589             new_ids = orderedSet(matches)
1590             ids.extend(new_ids)
1591
1592             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1593             if not mobj:
1594                 break
1595
1596             more = self._download_json(
1597                 'https://youtube.com/%s' % mobj.group('more'), title,
1598                 'Downloading page #%s' % page_num,
1599                 transform_source=uppercase_escape)
1600             content_html = more['content_html']
1601             more_widget_html = more['load_more_widget_html']
1602
1603         return {
1604             '_type': 'playlist',
1605             'title': title,
1606             'entries': self._ids_to_results(ids),
1607         }
1608
1609
1610 class YoutubeTruncatedURLIE(InfoExtractor):
1611     IE_NAME = 'youtube:truncated_url'
1612     IE_DESC = False  # Do not list
1613     _VALID_URL = r'''(?x)
1614         (?:https?://)?[^/]+/watch\?(?:
1615             feature=[a-z_]+|
1616             annotation_id=annotation_[^&]+
1617         )?$|
1618         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1619     '''
1620
1621     _TESTS = [{
1622         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1623         'only_matching': True,
1624     }, {
1625         'url': 'http://www.youtube.com/watch?',
1626         'only_matching': True,
1627     }]
1628
1629     def _real_extract(self, url):
1630         raise ExtractorError(
1631             'Did you forget to quote the URL? Remember that & is a meta '
1632             'character in most shells, so you want to put the URL in quotes, '
1633             'like  youtube-dl '
1634             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1635             ' or simply  youtube-dl BaW_jenozKc  .',
1636             expected=True)