_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381     ]
 382
 383
 384     @classmethod
 385     def suitable(cls, url):
 386         """Receives a URL and returns True if suitable for this IE."""
 387         if YoutubePlaylistIE.suitable(url): return False
 388         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 389
 390     def __init__(self, *args, **kwargs):
 391         super(YoutubeIE, self).__init__(*args, **kwargs)
 392         self._player_cache = {}
 393
 394     def report_video_webpage_download(self, video_id):
 395         """Report attempt to download video webpage."""
 396         self.to_screen(u'%s: Downloading video webpage' % video_id)
 397
 398     def report_video_info_webpage_download(self, video_id):
 399         """Report attempt to download video info webpage."""
 400         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 401
 402     def report_information_extraction(self, video_id):
 403         """Report attempt to extract video information."""
 404         self.to_screen(u'%s: Extracting video information' % video_id)
 405
 406     def report_unavailable_format(self, video_id, format):
 407         """Report extracted video URL."""
 408         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 409
 410     def report_rtmp_download(self):
 411         """Indicate the download will use the RTMP protocol."""
 412         self.to_screen(u'RTMP download detected')
 413
 414     def _extract_signature_function(self, video_id, player_url, slen):
 415         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 416                         player_url)
 417         player_type = id_m.group('ext')
 418         player_id = id_m.group('id')
 419
 420         # Read from filesystem cache
 421         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 422         assert os.path.basename(func_id) == func_id
 423         xdg_cache_home = os.environ.get('XDG_CACHE_HOME')
 424         if xdg_cache_home:
 425             userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl')
 426         else:
 427             userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl')
 428         cache_dir = self._downloader.params.get('cachedir', userCacheDir)
 429
 430         cache_enabled = cache_dir is not None
 431         if cache_enabled:
 432             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 433                                     u'youtube-sigfuncs',
 434                                     func_id + '.json')
 435             try:
 436                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 437                     cache_spec = json.load(cachef)
 438                 return lambda s: u''.join(s[i] for i in cache_spec)
 439             except IOError:
 440                 pass  # No cache available
 441
 442         if player_type == 'js':
 443             code = self._download_webpage(
 444                 player_url, video_id,
 445                 note=u'Downloading %s player %s' % (player_type, player_id),
 446                 errnote=u'Download of %s failed' % player_url)
 447             res = self._parse_sig_js(code)
 448         elif player_type == 'swf':
 449             urlh = self._request_webpage(
 450                 player_url, video_id,
 451                 note=u'Downloading %s player %s' % (player_type, player_id),
 452                 errnote=u'Download of %s failed' % player_url)
 453             code = urlh.read()
 454             res = self._parse_sig_swf(code)
 455         else:
 456             assert False, 'Invalid player type %r' % player_type
 457
 458         if cache_enabled:
 459             try:
 460                 test_string = u''.join(map(compat_chr, range(slen)))
 461                 cache_res = res(test_string)
 462                 cache_spec = [ord(c) for c in cache_res]
 463                 try:
 464                     os.makedirs(os.path.dirname(cache_fn))
 465                 except OSError as ose:
 466                     if ose.errno != errno.EEXIST:
 467                         raise
 468                 write_json_file(cache_spec, cache_fn)
 469             except Exception:
 470                 tb = traceback.format_exc()
 471                 self._downloader.report_warning(
 472                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 473
 474         return res
 475
 476     def _print_sig_code(self, func, slen):
 477         def gen_sig_code(idxs):
 478             def _genslice(start, end, step):
 479                 starts = u'' if start == 0 else str(start)
 480                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 481                 steps = u'' if step == 1 else (u':%d' % step)
 482                 return u's[%s%s%s]' % (starts, ends, steps)
 483
 484             step = None
 485             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 486                                     # set as soon as step is set
 487             for i, prev in zip(idxs[1:], idxs[:-1]):
 488                 if step is not None:
 489                     if i - prev == step:
 490                         continue
 491                     yield _genslice(start, prev, step)
 492                     step = None
 493                     continue
 494                 if i - prev in [-1, 1]:
 495                     step = i - prev
 496                     start = prev
 497                     continue
 498                 else:
 499                     yield u's[%d]' % prev
 500             if step is None:
 501                 yield u's[%d]' % i
 502             else:
 503                 yield _genslice(start, i, step)
 504
 505         test_string = u''.join(map(compat_chr, range(slen)))
 506         cache_res = func(test_string)
 507         cache_spec = [ord(c) for c in cache_res]
 508         expr_code = u' + '.join(gen_sig_code(cache_spec))
 509         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 510         self.to_screen(u'Extracted signature function:\n' + code)
 511
 512     def _parse_sig_js(self, jscode):
 513         funcname = self._search_regex(
 514             r'signature=([a-zA-Z]+)', jscode,
 515             u'Initial JS player signature function name')
 516
 517         functions = {}
 518
 519         def argidx(varname):
 520             return string.lowercase.index(varname)
 521
 522         def interpret_statement(stmt, local_vars, allow_recursion=20):
 523             if allow_recursion < 0:
 524                 raise ExtractorError(u'Recursion limit reached')
 525
 526             if stmt.startswith(u'var '):
 527                 stmt = stmt[len(u'var '):]
 528             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 529                              r'=(?P<expr>.*)$', stmt)
 530             if ass_m:
 531                 if ass_m.groupdict().get('index'):
 532                     def assign(val):
 533                         lvar = local_vars[ass_m.group('out')]
 534                         idx = interpret_expression(ass_m.group('index'),
 535                                                    local_vars, allow_recursion)
 536                         assert isinstance(idx, int)
 537                         lvar[idx] = val
 538                         return val
 539                     expr = ass_m.group('expr')
 540                 else:
 541                     def assign(val):
 542                         local_vars[ass_m.group('out')] = val
 543                         return val
 544                     expr = ass_m.group('expr')
 545             elif stmt.startswith(u'return '):
 546                 assign = lambda v: v
 547                 expr = stmt[len(u'return '):]
 548             else:
 549                 raise ExtractorError(
 550                     u'Cannot determine left side of statement in %r' % stmt)
 551
 552             v = interpret_expression(expr, local_vars, allow_recursion)
 553             return assign(v)
 554
 555         def interpret_expression(expr, local_vars, allow_recursion):
 556             if expr.isdigit():
 557                 return int(expr)
 558
 559             if expr.isalpha():
 560                 return local_vars[expr]
 561
 562             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 563             if m:
 564                 member = m.group('member')
 565                 val = local_vars[m.group('in')]
 566                 if member == 'split("")':
 567                     return list(val)
 568                 if member == 'join("")':
 569                     return u''.join(val)
 570                 if member == 'length':
 571                     return len(val)
 572                 if member == 'reverse()':
 573                     return val[::-1]
 574                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 575                 if slice_m:
 576                     idx = interpret_expression(
 577                         slice_m.group('idx'), local_vars, allow_recursion-1)
 578                     return val[idx:]
 579
 580             m = re.match(
 581                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 582             if m:
 583                 val = local_vars[m.group('in')]
 584                 idx = interpret_expression(m.group('idx'), local_vars,
 585                                            allow_recursion-1)
 586                 return val[idx]
 587
 588             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 589             if m:
 590                 a = interpret_expression(m.group('a'),
 591                                          local_vars, allow_recursion)
 592                 b = interpret_expression(m.group('b'),
 593                                          local_vars, allow_recursion)
 594                 return a % b
 595
 596             m = re.match(
 597                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 598             if m:
 599                 fname = m.group('func')
 600                 if fname not in functions:
 601                     functions[fname] = extract_function(fname)
 602                 argvals = [int(v) if v.isdigit() else local_vars[v]
 603                            for v in m.group('args').split(',')]
 604                 return functions[fname](argvals)
 605             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 606
 607         def extract_function(funcname):
 608             func_m = re.search(
 609                 r'function ' + re.escape(funcname) +
 610                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 611                 jscode)
 612             argnames = func_m.group('args').split(',')
 613
 614             def resf(args):
 615                 local_vars = dict(zip(argnames, args))
 616                 for stmt in func_m.group('code').split(';'):
 617                     res = interpret_statement(stmt, local_vars)
 618                 return res
 619             return resf
 620
 621         initial_function = extract_function(funcname)
 622         return lambda s: initial_function([s])
 623
 624     def _parse_sig_swf(self, file_contents):
 625         if file_contents[1:3] != b'WS':
 626             raise ExtractorError(
 627                 u'Not an SWF file; header is %r' % file_contents[:3])
 628         if file_contents[:1] == b'C':
 629             content = zlib.decompress(file_contents[8:])
 630         else:
 631             raise NotImplementedError(u'Unsupported compression format %r' %
 632                                       file_contents[:1])
 633
 634         def extract_tags(content):
 635             pos = 0
 636             while pos < len(content):
 637                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 638                 pos += 2
 639                 tag_code = header16 >> 6
 640                 tag_len = header16 & 0x3f
 641                 if tag_len == 0x3f:
 642                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 643                     pos += 4
 644                 assert pos+tag_len <= len(content)
 645                 yield (tag_code, content[pos:pos+tag_len])
 646                 pos += tag_len
 647
 648         code_tag = next(tag
 649                         for tag_code, tag in extract_tags(content)
 650                         if tag_code == 82)
 651         p = code_tag.index(b'\0', 4) + 1
 652         code_reader = io.BytesIO(code_tag[p:])
 653
 654         # Parse ABC (AVM2 ByteCode)
 655         def read_int(reader=None):
 656             if reader is None:
 657                 reader = code_reader
 658             res = 0
 659             shift = 0
 660             for _ in range(5):
 661                 buf = reader.read(1)
 662                 assert len(buf) == 1
 663                 b = struct.unpack('<B', buf)[0]
 664                 res = res | ((b & 0x7f) << shift)
 665                 if b & 0x80 == 0:
 666                     break
 667                 shift += 7
 668             return res
 669
 670         def u30(reader=None):
 671             res = read_int(reader)
 672             assert res & 0xf0000000 == 0
 673             return res
 674         u32 = read_int
 675
 676         def s32(reader=None):
 677             v = read_int(reader)
 678             if v & 0x80000000 != 0:
 679                 v = - ((v ^ 0xffffffff) + 1)
 680             return v
 681
 682         def read_string(reader=None):
 683             if reader is None:
 684                 reader = code_reader
 685             slen = u30(reader)
 686             resb = reader.read(slen)
 687             assert len(resb) == slen
 688             return resb.decode('utf-8')
 689
 690         def read_bytes(count, reader=None):
 691             if reader is None:
 692                 reader = code_reader
 693             resb = reader.read(count)
 694             assert len(resb) == count
 695             return resb
 696
 697         def read_byte(reader=None):
 698             resb = read_bytes(1, reader=reader)
 699             res = struct.unpack('<B', resb)[0]
 700             return res
 701
 702         # minor_version + major_version
 703         read_bytes(2 + 2)
 704
 705         # Constant pool
 706         int_count = u30()
 707         for _c in range(1, int_count):
 708             s32()
 709         uint_count = u30()
 710         for _c in range(1, uint_count):
 711             u32()
 712         double_count = u30()
 713         read_bytes((double_count-1) * 8)
 714         string_count = u30()
 715         constant_strings = [u'']
 716         for _c in range(1, string_count):
 717             s = read_string()
 718             constant_strings.append(s)
 719         namespace_count = u30()
 720         for _c in range(1, namespace_count):
 721             read_bytes(1)  # kind
 722             u30()  # name
 723         ns_set_count = u30()
 724         for _c in range(1, ns_set_count):
 725             count = u30()
 726             for _c2 in range(count):
 727                 u30()
 728         multiname_count = u30()
 729         MULTINAME_SIZES = {
 730             0x07: 2,  # QName
 731             0x0d: 2,  # QNameA
 732             0x0f: 1,  # RTQName
 733             0x10: 1,  # RTQNameA
 734             0x11: 0,  # RTQNameL
 735             0x12: 0,  # RTQNameLA
 736             0x09: 2,  # Multiname
 737             0x0e: 2,  # MultinameA
 738             0x1b: 1,  # MultinameL
 739             0x1c: 1,  # MultinameLA
 740         }
 741         multinames = [u'']
 742         for _c in range(1, multiname_count):
 743             kind = u30()
 744             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 745             if kind == 0x07:
 746                 u30()  # namespace_idx
 747                 name_idx = u30()
 748                 multinames.append(constant_strings[name_idx])
 749             else:
 750                 multinames.append('[MULTINAME kind: %d]' % kind)
 751                 for _c2 in range(MULTINAME_SIZES[kind]):
 752                     u30()
 753
 754         # Methods
 755         method_count = u30()
 756         MethodInfo = collections.namedtuple(
 757             'MethodInfo',
 758             ['NEED_ARGUMENTS', 'NEED_REST'])
 759         method_infos = []
 760         for method_id in range(method_count):
 761             param_count = u30()
 762             u30()  # return type
 763             for _ in range(param_count):
 764                 u30()  # param type
 765             u30()  # name index (always 0 for youtube)
 766             flags = read_byte()
 767             if flags & 0x08 != 0:
 768                 # Options present
 769                 option_count = u30()
 770                 for c in range(option_count):
 771                     u30()  # val
 772                     read_bytes(1)  # kind
 773             if flags & 0x80 != 0:
 774                 # Param names present
 775                 for _ in range(param_count):
 776                     u30()  # param name
 777             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 778             method_infos.append(mi)
 779
 780         # Metadata
 781         metadata_count = u30()
 782         for _c in range(metadata_count):
 783             u30()  # name
 784             item_count = u30()
 785             for _c2 in range(item_count):
 786                 u30()  # key
 787                 u30()  # value
 788
 789         def parse_traits_info():
 790             trait_name_idx = u30()
 791             kind_full = read_byte()
 792             kind = kind_full & 0x0f
 793             attrs = kind_full >> 4
 794             methods = {}
 795             if kind in [0x00, 0x06]:  # Slot or Const
 796                 u30()  # Slot id
 797                 u30()  # type_name_idx
 798                 vindex = u30()
 799                 if vindex != 0:
 800                     read_byte()  # vkind
 801             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 802                 u30()  # disp_id
 803                 method_idx = u30()
 804                 methods[multinames[trait_name_idx]] = method_idx
 805             elif kind == 0x04:  # Class
 806                 u30()  # slot_id
 807                 u30()  # classi
 808             elif kind == 0x05:  # Function
 809                 u30()  # slot_id
 810                 function_idx = u30()
 811                 methods[function_idx] = multinames[trait_name_idx]
 812             else:
 813                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 814
 815             if attrs & 0x4 != 0:  # Metadata present
 816                 metadata_count = u30()
 817                 for _c3 in range(metadata_count):
 818                     u30()  # metadata index
 819
 820             return methods
 821
 822         # Classes
 823         TARGET_CLASSNAME = u'SignatureDecipher'
 824         searched_idx = multinames.index(TARGET_CLASSNAME)
 825         searched_class_id = None
 826         class_count = u30()
 827         for class_id in range(class_count):
 828             name_idx = u30()
 829             if name_idx == searched_idx:
 830                 # We found the class we're looking for!
 831                 searched_class_id = class_id
 832             u30()  # super_name idx
 833             flags = read_byte()
 834             if flags & 0x08 != 0:  # Protected namespace is present
 835                 u30()  # protected_ns_idx
 836             intrf_count = u30()
 837             for _c2 in range(intrf_count):
 838                 u30()
 839             u30()  # iinit
 840             trait_count = u30()
 841             for _c2 in range(trait_count):
 842                 parse_traits_info()
 843
 844         if searched_class_id is None:
 845             raise ExtractorError(u'Target class %r not found' %
 846                                  TARGET_CLASSNAME)
 847
 848         method_names = {}
 849         method_idxs = {}
 850         for class_id in range(class_count):
 851             u30()  # cinit
 852             trait_count = u30()
 853             for _c2 in range(trait_count):
 854                 trait_methods = parse_traits_info()
 855                 if class_id == searched_class_id:
 856                     method_names.update(trait_methods.items())
 857                     method_idxs.update(dict(
 858                         (idx, name)
 859                         for name, idx in trait_methods.items()))
 860
 861         # Scripts
 862         script_count = u30()
 863         for _c in range(script_count):
 864             u30()  # init
 865             trait_count = u30()
 866             for _c2 in range(trait_count):
 867                 parse_traits_info()
 868
 869         # Method bodies
 870         method_body_count = u30()
 871         Method = collections.namedtuple('Method', ['code', 'local_count'])
 872         methods = {}
 873         for _c in range(method_body_count):
 874             method_idx = u30()
 875             u30()  # max_stack
 876             local_count = u30()
 877             u30()  # init_scope_depth
 878             u30()  # max_scope_depth
 879             code_length = u30()
 880             code = read_bytes(code_length)
 881             if method_idx in method_idxs:
 882                 m = Method(code, local_count)
 883                 methods[method_idxs[method_idx]] = m
 884             exception_count = u30()
 885             for _c2 in range(exception_count):
 886                 u30()  # from
 887                 u30()  # to
 888                 u30()  # target
 889                 u30()  # exc_type
 890                 u30()  # var_name
 891             trait_count = u30()
 892             for _c2 in range(trait_count):
 893                 parse_traits_info()
 894
 895         assert p + code_reader.tell() == len(code_tag)
 896         assert len(methods) == len(method_idxs)
 897
 898         method_pyfunctions = {}
 899
 900         def extract_function(func_name):
 901             if func_name in method_pyfunctions:
 902                 return method_pyfunctions[func_name]
 903             if func_name not in methods:
 904                 raise ExtractorError(u'Cannot find function %r' % func_name)
 905             m = methods[func_name]
 906
 907             def resfunc(args):
 908                 registers = ['(this)'] + list(args) + [None] * m.local_count
 909                 stack = []
 910                 coder = io.BytesIO(m.code)
 911                 while True:
 912                     opcode = struct.unpack('!B', coder.read(1))[0]
 913                     if opcode == 36:  # pushbyte
 914                         v = struct.unpack('!B', coder.read(1))[0]
 915                         stack.append(v)
 916                     elif opcode == 44:  # pushstring
 917                         idx = u30(coder)
 918                         stack.append(constant_strings[idx])
 919                     elif opcode == 48:  # pushscope
 920                         # We don't implement the scope register, so we'll just
 921                         # ignore the popped value
 922                         stack.pop()
 923                     elif opcode == 70:  # callproperty
 924                         index = u30(coder)
 925                         mname = multinames[index]
 926                         arg_count = u30(coder)
 927                         args = list(reversed(
 928                             [stack.pop() for _ in range(arg_count)]))
 929                         obj = stack.pop()
 930                         if mname == u'split':
 931                             assert len(args) == 1
 932                             assert isinstance(args[0], compat_str)
 933                             assert isinstance(obj, compat_str)
 934                             if args[0] == u'':
 935                                 res = list(obj)
 936                             else:
 937                                 res = obj.split(args[0])
 938                             stack.append(res)
 939                         elif mname == u'slice':
 940                             assert len(args) == 1
 941                             assert isinstance(args[0], int)
 942                             assert isinstance(obj, list)
 943                             res = obj[args[0]:]
 944                             stack.append(res)
 945                         elif mname == u'join':
 946                             assert len(args) == 1
 947                             assert isinstance(args[0], compat_str)
 948                             assert isinstance(obj, list)
 949                             res = args[0].join(obj)
 950                             stack.append(res)
 951                         elif mname in method_pyfunctions:
 952                             stack.append(method_pyfunctions[mname](args))
 953                         else:
 954                             raise NotImplementedError(
 955                                 u'Unsupported property %r on %r'
 956                                 % (mname, obj))
 957                     elif opcode == 72:  # returnvalue
 958                         res = stack.pop()
 959                         return res
 960                     elif opcode == 79:  # callpropvoid
 961                         index = u30(coder)
 962                         mname = multinames[index]
 963                         arg_count = u30(coder)
 964                         args = list(reversed(
 965                             [stack.pop() for _ in range(arg_count)]))
 966                         obj = stack.pop()
 967                         if mname == u'reverse':
 968                             assert isinstance(obj, list)
 969                             obj.reverse()
 970                         else:
 971                             raise NotImplementedError(
 972                                 u'Unsupported (void) property %r on %r'
 973                                 % (mname, obj))
 974                     elif opcode == 93:  # findpropstrict
 975                         index = u30(coder)
 976                         mname = multinames[index]
 977                         res = extract_function(mname)
 978                         stack.append(res)
 979                     elif opcode == 97:  # setproperty
 980                         index = u30(coder)
 981                         value = stack.pop()
 982                         idx = stack.pop()
 983                         obj = stack.pop()
 984                         assert isinstance(obj, list)
 985                         assert isinstance(idx, int)
 986                         obj[idx] = value
 987                     elif opcode == 98:  # getlocal
 988                         index = u30(coder)
 989                         stack.append(registers[index])
 990                     elif opcode == 99:  # setlocal
 991                         index = u30(coder)
 992                         value = stack.pop()
 993                         registers[index] = value
 994                     elif opcode == 102:  # getproperty
 995                         index = u30(coder)
 996                         pname = multinames[index]
 997                         if pname == u'length':
 998                             obj = stack.pop()
 999                             assert isinstance(obj, list)
1000                             stack.append(len(obj))
1001                         else:  # Assume attribute access
1002                             idx = stack.pop()
1003                             assert isinstance(idx, int)
1004                             obj = stack.pop()
1005                             assert isinstance(obj, list)
1006                             stack.append(obj[idx])
1007                     elif opcode == 128:  # coerce
1008                         u30(coder)
1009                     elif opcode == 133:  # coerce_s
1010                         assert isinstance(stack[-1], (type(None), compat_str))
1011                     elif opcode == 164:  # modulo
1012                         value2 = stack.pop()
1013                         value1 = stack.pop()
1014                         res = value1 % value2
1015                         stack.append(res)
1016                     elif opcode == 208:  # getlocal_0
1017                         stack.append(registers[0])
1018                     elif opcode == 209:  # getlocal_1
1019                         stack.append(registers[1])
1020                     elif opcode == 210:  # getlocal_2
1021                         stack.append(registers[2])
1022                     elif opcode == 211:  # getlocal_3
1023                         stack.append(registers[3])
1024                     elif opcode == 214:  # setlocal_2
1025                         registers[2] = stack.pop()
1026                     elif opcode == 215:  # setlocal_3
1027                         registers[3] = stack.pop()
1028                     else:
1029                         raise NotImplementedError(
1030                             u'Unsupported opcode %d' % opcode)
1031
1032             method_pyfunctions[func_name] = resfunc
1033             return resfunc
1034
1035         initial_function = extract_function(u'decipher')
1036         return lambda s: initial_function([s])
1037
1038     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1039         """Turn the encrypted s field into a working signature"""
1040
1041         if player_url is not None:
1042             try:
1043                 if player_url not in self._player_cache:
1044                     func = self._extract_signature_function(
1045                         video_id, player_url, len(s)
1046                     )
1047                     self._player_cache[player_url] = func
1048                 func = self._player_cache[player_url]
1049                 if self._downloader.params.get('youtube_print_sig_code'):
1050                     self._print_sig_code(func, len(s))
1051                 return func(s)
1052             except Exception:
1053                 tb = traceback.format_exc()
1054                 self._downloader.report_warning(
1055                     u'Automatic signature extraction failed: ' + tb)
1056
1057             self._downloader.report_warning(
1058                 u'Warning: Falling back to static signature algorithm')
1059
1060         return self._static_decrypt_signature(
1061             s, video_id, player_url, age_gate)
1062
1063     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1064         if age_gate:
1065             # The videos with age protection use another player, so the
1066             # algorithms can be different.
1067             if len(s) == 86:
1068                 return s[2:63] + s[82] + s[64:82] + s[63]
1069
1070         if len(s) == 93:
1071             return s[86:29:-1] + s[88] + s[28:5:-1]
1072         elif len(s) == 92:
1073             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1074         elif len(s) == 91:
1075             return s[84:27:-1] + s[86] + s[26:5:-1]
1076         elif len(s) == 90:
1077             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1078         elif len(s) == 89:
1079             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1080         elif len(s) == 88:
1081             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1082         elif len(s) == 87:
1083             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1084         elif len(s) == 86:
1085             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1086         elif len(s) == 85:
1087             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1088         elif len(s) == 84:
1089             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1090         elif len(s) == 83:
1091             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1092         elif len(s) == 82:
1093             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1094         elif len(s) == 81:
1095             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1096         elif len(s) == 80:
1097             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1098         elif len(s) == 79:
1099             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1100
1101         else:
1102             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1103
1104     def _get_available_subtitles(self, video_id):
1105         try:
1106             sub_list = self._download_webpage(
1107                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1108                 video_id, note=False)
1109         except ExtractorError as err:
1110             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1111             return {}
1112         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1113
1114         sub_lang_list = {}
1115         for l in lang_list:
1116             lang = l[1]
1117             params = compat_urllib_parse.urlencode({
1118                 'lang': lang,
1119                 'v': video_id,
1120                 'fmt': self._downloader.params.get('subtitlesformat'),
1121             })
1122             url = u'http://www.youtube.com/api/timedtext?' + params
1123             sub_lang_list[lang] = url
1124         if not sub_lang_list:
1125             self._downloader.report_warning(u'video doesn\'t have subtitles')
1126             return {}
1127         return sub_lang_list
1128
1129     def _get_available_automatic_caption(self, video_id, webpage):
1130         """We need the webpage for getting the captions url, pass it as an
1131            argument to speed up the process."""
1132         sub_format = self._downloader.params.get('subtitlesformat')
1133         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1134         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1135         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1136         if mobj is None:
1137             self._downloader.report_warning(err_msg)
1138             return {}
1139         player_config = json.loads(mobj.group(1))
1140         try:
1141             args = player_config[u'args']
1142             caption_url = args[u'ttsurl']
1143             timestamp = args[u'timestamp']
1144             # We get the available subtitles
1145             list_params = compat_urllib_parse.urlencode({
1146                 'type': 'list',
1147                 'tlangs': 1,
1148                 'asrs': 1,
1149             })
1150             list_url = caption_url + '&' + list_params
1151             list_page = self._download_webpage(list_url, video_id)
1152             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1153             original_lang_node = caption_list.find('track')
1154             if original_lang_node.attrib.get('kind') != 'asr' :
1155                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1156                 return {}
1157             original_lang = original_lang_node.attrib['lang_code']
1158
1159             sub_lang_list = {}
1160             for lang_node in caption_list.findall('target'):
1161                 sub_lang = lang_node.attrib['lang_code']
1162                 params = compat_urllib_parse.urlencode({
1163                     'lang': original_lang,
1164                     'tlang': sub_lang,
1165                     'fmt': sub_format,
1166                     'ts': timestamp,
1167                     'kind': 'asr',
1168                 })
1169                 sub_lang_list[sub_lang] = caption_url + '&' + params
1170             return sub_lang_list
1171         # An extractor error can be raise by the download process if there are
1172         # no automatic captions but there are subtitles
1173         except (KeyError, ExtractorError):
1174             self._downloader.report_warning(err_msg)
1175             return {}
1176
1177     def _print_formats(self, formats):
1178         print('Available formats:')
1179         for x in formats:
1180             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1181                                         self._video_dimensions.get(x, '???'),
1182                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1183
1184     def _extract_id(self, url):
1185         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1186         if mobj is None:
1187             raise ExtractorError(u'Invalid URL: %s' % url)
1188         video_id = mobj.group(2)
1189         return video_id
1190
1191     def _get_video_url_list(self, url_map):
1192         """
1193         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1194         with the requested formats.
1195         """
1196         req_format = self._downloader.params.get('format', None)
1197         format_limit = self._downloader.params.get('format_limit', None)
1198         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1199         if format_limit is not None and format_limit in available_formats:
1200             format_list = available_formats[available_formats.index(format_limit):]
1201         else:
1202             format_list = available_formats
1203         existing_formats = [x for x in format_list if x in url_map]
1204         if len(existing_formats) == 0:
1205             raise ExtractorError(u'no known formats available for video')
1206         if self._downloader.params.get('listformats', None):
1207             self._print_formats(existing_formats)
1208             return
1209         if req_format is None or req_format == 'best':
1210             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1211         elif req_format == 'worst':
1212             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1213         elif req_format in ('-1', 'all'):
1214             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1215         else:
1216             # Specific formats. We pick the first in a slash-delimeted sequence.
1217             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1218             # available in the specified format. For example,
1219             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1220             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1221             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1222             req_formats = req_format.split('/')
1223             video_url_list = None
1224             for rf in req_formats:
1225                 if rf in url_map:
1226                     video_url_list = [(rf, url_map[rf])]
1227                     break
1228                 if rf in self._video_formats_map:
1229                     for srf in self._video_formats_map[rf]:
1230                         if srf in url_map:
1231                             video_url_list = [(srf, url_map[srf])]
1232                             break
1233                     else:
1234                         continue
1235                     break
1236             if video_url_list is None:
1237                 raise ExtractorError(u'requested format not available')
1238         return video_url_list
1239
1240     def _extract_from_m3u8(self, manifest_url, video_id):
1241         url_map = {}
1242         def _get_urls(_manifest):
1243             lines = _manifest.split('\n')
1244             urls = filter(lambda l: l and not l.startswith('#'),
1245                             lines)
1246             return urls
1247         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1248         formats_urls = _get_urls(manifest)
1249         for format_url in formats_urls:
1250             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1251             url_map[itag] = format_url
1252         return url_map
1253
1254     def _real_extract(self, url):
1255         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1256             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1257
1258         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1259         mobj = re.search(self._NEXT_URL_RE, url)
1260         if mobj:
1261             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1262         video_id = self._extract_id(url)
1263
1264         # Get video webpage
1265         self.report_video_webpage_download(video_id)
1266         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1267         request = compat_urllib_request.Request(url)
1268         try:
1269             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1270         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1271             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1272
1273         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1274
1275         # Attempt to extract SWF player URL
1276         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1277         if mobj is not None:
1278             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1279         else:
1280             player_url = None
1281
1282         # Get video info
1283         self.report_video_info_webpage_download(video_id)
1284         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1285             self.report_age_confirmation()
1286             age_gate = True
1287             # We simulate the access to the video from www.youtube.com/v/{video_id}
1288             # this can be viewed without login into Youtube
1289             data = compat_urllib_parse.urlencode({'video_id': video_id,
1290                                                   'el': 'embedded',
1291                                                   'gl': 'US',
1292                                                   'hl': 'en',
1293                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1294                                                   'asv': 3,
1295                                                   'sts':'1588',
1296                                                   })
1297             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1298             video_info_webpage = self._download_webpage(video_info_url, video_id,
1299                                     note=False,
1300                                     errnote='unable to download video info webpage')
1301             video_info = compat_parse_qs(video_info_webpage)
1302         else:
1303             age_gate = False
1304             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1305                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1306                         % (video_id, el_type))
1307                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1308                                         note=False,
1309                                         errnote='unable to download video info webpage')
1310                 video_info = compat_parse_qs(video_info_webpage)
1311                 if 'token' in video_info:
1312                     break
1313         if 'token' not in video_info:
1314             if 'reason' in video_info:
1315                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1316             else:
1317                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1318
1319         # Check for "rental" videos
1320         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1321             raise ExtractorError(u'"rental" videos not supported')
1322
1323         # Start extracting information
1324         self.report_information_extraction(video_id)
1325
1326         # uploader
1327         if 'author' not in video_info:
1328             raise ExtractorError(u'Unable to extract uploader name')
1329         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1330
1331         # uploader_id
1332         video_uploader_id = None
1333         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1334         if mobj is not None:
1335             video_uploader_id = mobj.group(1)
1336         else:
1337             self._downloader.report_warning(u'unable to extract uploader nickname')
1338
1339         # title
1340         if 'title' not in video_info:
1341             raise ExtractorError(u'Unable to extract video title')
1342         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1343
1344         # thumbnail image
1345         # We try first to get a high quality image:
1346         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1347                             video_webpage, re.DOTALL)
1348         if m_thumb is not None:
1349             video_thumbnail = m_thumb.group(1)
1350         elif 'thumbnail_url' not in video_info:
1351             self._downloader.report_warning(u'unable to extract video thumbnail')
1352             video_thumbnail = None
1353         else:   # don't panic if we can't find it
1354             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1355
1356         # upload date
1357         upload_date = None
1358         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1359         if mobj is not None:
1360             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1361             upload_date = unified_strdate(upload_date)
1362
1363         # description
1364         video_description = get_element_by_id("eow-description", video_webpage)
1365         if video_description:
1366             video_description = clean_html(video_description)
1367         else:
1368             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1369             if fd_mobj:
1370                 video_description = unescapeHTML(fd_mobj.group(1))
1371             else:
1372                 video_description = u''
1373
1374         # subtitles
1375         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1376
1377         if self._downloader.params.get('listsubtitles', False):
1378             self._list_available_subtitles(video_id, video_webpage)
1379             return
1380
1381         if 'length_seconds' not in video_info:
1382             self._downloader.report_warning(u'unable to extract video duration')
1383             video_duration = ''
1384         else:
1385             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1386
1387         # Decide which formats to download
1388
1389         try:
1390             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1391             if not mobj:
1392                 raise ValueError('Could not find vevo ID')
1393             info = json.loads(mobj.group(1))
1394             args = info['args']
1395             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1396             # this signatures are encrypted
1397             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1398             if m_s is not None:
1399                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1400                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1401             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1402             if m_s is not None:
1403                 if 'url_encoded_fmt_stream_map' in video_info:
1404                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1405                 else:
1406                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1407             elif 'adaptive_fmts' in video_info:
1408                 if 'url_encoded_fmt_stream_map' in video_info:
1409                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1410                 else:
1411                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1412         except ValueError:
1413             pass
1414
1415         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1416             self.report_rtmp_download()
1417             video_url_list = [(None, video_info['conn'][0])]
1418         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1419             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1420                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1421             url_map = {}
1422             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1423                 url_data = compat_parse_qs(url_data_str)
1424                 if 'itag' in url_data and 'url' in url_data:
1425                     url = url_data['url'][0]
1426                     if 'sig' in url_data:
1427                         url += '&signature=' + url_data['sig'][0]
1428                     elif 's' in url_data:
1429                         encrypted_sig = url_data['s'][0]
1430                         if self._downloader.params.get('verbose'):
1431                             if age_gate:
1432                                 if player_url is None:
1433                                     player_version = 'unknown'
1434                                 else:
1435                                     player_version = self._search_regex(
1436                                         r'-(.+)\.swf$', player_url,
1437                                         u'flash player', fatal=False)
1438                                 player_desc = 'flash player %s' % player_version
1439                             else:
1440                                 player_version = self._search_regex(
1441                                     r'html5player-(.+?)\.js', video_webpage,
1442                                     'html5 player', fatal=False)
1443                                 player_desc = u'html5 player %s' % player_version
1444
1445                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1446                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1447                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1448
1449                         if not age_gate:
1450                             jsplayer_url_json = self._search_regex(
1451                                 r'"assets":.+?"js":\s*("[^"]+")',
1452                                 video_webpage, u'JS player URL')
1453                             player_url = json.loads(jsplayer_url_json)
1454
1455                         signature = self._decrypt_signature(
1456                             encrypted_sig, video_id, player_url, age_gate)
1457                         url += '&signature=' + signature
1458                     if 'ratebypass' not in url:
1459                         url += '&ratebypass=yes'
1460                     url_map[url_data['itag'][0]] = url
1461             video_url_list = self._get_video_url_list(url_map)
1462             if not video_url_list:
1463                 return
1464         elif video_info.get('hlsvp'):
1465             manifest_url = video_info['hlsvp'][0]
1466             url_map = self._extract_from_m3u8(manifest_url, video_id)
1467             video_url_list = self._get_video_url_list(url_map)
1468             if not video_url_list:
1469                 return
1470
1471         else:
1472             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1473
1474         results = []
1475         for format_param, video_real_url in video_url_list:
1476             # Extension
1477             video_extension = self._video_extensions.get(format_param, 'flv')
1478
1479             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1480                                               self._video_dimensions.get(format_param, '???'),
1481                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1482
1483             results.append({
1484                 'id':       video_id,
1485                 'url':      video_real_url,
1486                 'uploader': video_uploader,
1487                 'uploader_id': video_uploader_id,
1488                 'upload_date':  upload_date,
1489                 'title':    video_title,
1490                 'ext':      video_extension,
1491                 'format':   video_format,
1492                 'thumbnail':    video_thumbnail,
1493                 'description':  video_description,
1494                 'player_url':   player_url,
1495                 'subtitles':    video_subtitles,
1496                 'duration':     video_duration
1497             })
1498         return results
1499
1500 class YoutubePlaylistIE(InfoExtractor):
1501     IE_DESC = u'YouTube.com playlists'
1502     _VALID_URL = r"""(?:
1503                         (?:https?://)?
1504                         (?:\w+\.)?
1505                         youtube\.com/
1506                         (?:
1507                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1508                            \? (?:.*?&)*? (?:p|a|list)=
1509                         |  p/
1510                         )
1511                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1512                         .*
1513                      |
1514                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1515                      )"""
1516     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1517     _MAX_RESULTS = 50
1518     IE_NAME = u'youtube:playlist'
1519
1520     @classmethod
1521     def suitable(cls, url):
1522         """Receives a URL and returns True if suitable for this IE."""
1523         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1524
1525     def _real_extract(self, url):
1526         # Extract playlist id
1527         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1528         if mobj is None:
1529             raise ExtractorError(u'Invalid URL: %s' % url)
1530
1531         # Download playlist videos from API
1532         playlist_id = mobj.group(1) or mobj.group(2)
1533         videos = []
1534
1535         for page_num in itertools.count(1):
1536             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1537             if start_index >= 1000:
1538                 self._downloader.report_warning(u'Max number of results reached')
1539                 break
1540             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1541             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1542
1543             try:
1544                 response = json.loads(page)
1545             except ValueError as err:
1546                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1547
1548             if 'feed' not in response:
1549                 raise ExtractorError(u'Got a malformed response from YouTube API')
1550             playlist_title = response['feed']['title']['$t']
1551             if 'entry' not in response['feed']:
1552                 # Number of videos is a multiple of self._MAX_RESULTS
1553                 break
1554
1555             for entry in response['feed']['entry']:
1556                 index = entry['yt$position']['$t']
1557                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1558                     videos.append((
1559                         index,
1560                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1561                     ))
1562
1563         videos = [v[1] for v in sorted(videos)]
1564
1565         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1566         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1567
1568
1569 class YoutubeChannelIE(InfoExtractor):
1570     IE_DESC = u'YouTube.com channels'
1571     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1572     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1573     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1574     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1575     IE_NAME = u'youtube:channel'
1576
1577     def extract_videos_from_page(self, page):
1578         ids_in_page = []
1579         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1580             if mobj.group(1) not in ids_in_page:
1581                 ids_in_page.append(mobj.group(1))
1582         return ids_in_page
1583
1584     def _real_extract(self, url):
1585         # Extract channel id
1586         mobj = re.match(self._VALID_URL, url)
1587         if mobj is None:
1588             raise ExtractorError(u'Invalid URL: %s' % url)
1589
1590         # Download channel page
1591         channel_id = mobj.group(1)
1592         video_ids = []
1593         pagenum = 1
1594
1595         url = self._TEMPLATE_URL % (channel_id, pagenum)
1596         page = self._download_webpage(url, channel_id,
1597                                       u'Downloading page #%s' % pagenum)
1598
1599         # Extract video identifiers
1600         ids_in_page = self.extract_videos_from_page(page)
1601         video_ids.extend(ids_in_page)
1602
1603         # Download any subsequent channel pages using the json-based channel_ajax query
1604         if self._MORE_PAGES_INDICATOR in page:
1605             for pagenum in itertools.count(1):
1606                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1607                 page = self._download_webpage(url, channel_id,
1608                                               u'Downloading page #%s' % pagenum)
1609
1610                 page = json.loads(page)
1611
1612                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1613                 video_ids.extend(ids_in_page)
1614
1615                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1616                     break
1617
1618         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1619
1620         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1621         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1622         return [self.playlist_result(url_entries, channel_id)]
1623
1624
1625 class YoutubeUserIE(InfoExtractor):
1626     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1627     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1628     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1629     _GDATA_PAGE_SIZE = 50
1630     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1631     IE_NAME = u'youtube:user'
1632
1633     @classmethod
1634     def suitable(cls, url):
1635         # Don't return True if the url can be extracted with other youtube
1636         # extractor, the regex would is too permissive and it would match.
1637         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1638         if any(ie.suitable(url) for ie in other_ies): return False
1639         else: return super(YoutubeUserIE, cls).suitable(url)
1640
1641     def _real_extract(self, url):
1642         # Extract username
1643         mobj = re.match(self._VALID_URL, url)
1644         if mobj is None:
1645             raise ExtractorError(u'Invalid URL: %s' % url)
1646
1647         username = mobj.group(1)
1648
1649         # Download video ids using YouTube Data API. Result size per
1650         # query is limited (currently to 50 videos) so we need to query
1651         # page by page until there are no video ids - it means we got
1652         # all of them.
1653
1654         video_ids = []
1655
1656         for pagenum in itertools.count(0):
1657             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1658
1659             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1660             page = self._download_webpage(gdata_url, username,
1661                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1662
1663             try:
1664                 response = json.loads(page)
1665             except ValueError as err:
1666                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1667             if 'entry' not in response['feed']:
1668                 # Number of videos is a multiple of self._MAX_RESULTS
1669                 break
1670
1671             # Extract video identifiers
1672             ids_in_page = []
1673             for entry in response['feed']['entry']:
1674                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1675             video_ids.extend(ids_in_page)
1676
1677             # A little optimization - if current page is not
1678             # "full", ie. does not contain PAGE_SIZE video ids then
1679             # we can assume that this page is the last one - there
1680             # are no more ids on further pages - no need to query
1681             # again.
1682
1683             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1684                 break
1685
1686         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1687         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1688         return [self.playlist_result(url_results, playlist_title = username)]
1689
1690 class YoutubeSearchIE(SearchInfoExtractor):
1691     IE_DESC = u'YouTube.com searches'
1692     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1693     _MAX_RESULTS = 1000
1694     IE_NAME = u'youtube:search'
1695     _SEARCH_KEY = 'ytsearch'
1696
1697     def report_download_page(self, query, pagenum):
1698         """Report attempt to download search page with given number."""
1699         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1700
1701     def _get_n_results(self, query, n):
1702         """Get a specified number of results for a query"""
1703
1704         video_ids = []
1705         pagenum = 0
1706         limit = n
1707
1708         while (50 * pagenum) < limit:
1709             self.report_download_page(query, pagenum+1)
1710             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1711             request = compat_urllib_request.Request(result_url)
1712             try:
1713                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1714             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1715                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1716             api_response = json.loads(data)['data']
1717
1718             if not 'items' in api_response:
1719                 raise ExtractorError(u'[youtube] No video results')
1720
1721             new_ids = list(video['id'] for video in api_response['items'])
1722             video_ids += new_ids
1723
1724             limit = min(n, api_response['totalItems'])
1725             pagenum += 1
1726
1727         if len(video_ids) > n:
1728             video_ids = video_ids[:n]
1729         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1730         return self.playlist_result(videos, query)
1731
1732
1733 class YoutubeShowIE(InfoExtractor):
1734     IE_DESC = u'YouTube.com (multi-season) shows'
1735     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1736     IE_NAME = u'youtube:show'
1737
1738     def _real_extract(self, url):
1739         mobj = re.match(self._VALID_URL, url)
1740         show_name = mobj.group(1)
1741         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1742         # There's one playlist for each season of the show
1743         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1744         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1745         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1746
1747
1748 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1749     """
1750     Base class for extractors that fetch info from
1751     http://www.youtube.com/feed_ajax
1752     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1753     """
1754     _LOGIN_REQUIRED = True
1755     _PAGING_STEP = 30
1756     # use action_load_personal_feed instead of action_load_system_feed
1757     _PERSONAL_FEED = False
1758
1759     @property
1760     def _FEED_TEMPLATE(self):
1761         action = 'action_load_system_feed'
1762         if self._PERSONAL_FEED:
1763             action = 'action_load_personal_feed'
1764         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1765
1766     @property
1767     def IE_NAME(self):
1768         return u'youtube:%s' % self._FEED_NAME
1769
1770     def _real_initialize(self):
1771         self._login()
1772
1773     def _real_extract(self, url):
1774         feed_entries = []
1775         # The step argument is available only in 2.7 or higher
1776         for i in itertools.count(0):
1777             paging = i*self._PAGING_STEP
1778             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1779                                           u'%s feed' % self._FEED_NAME,
1780                                           u'Downloading page %s' % i)
1781             info = json.loads(info)
1782             feed_html = info['feed_html']
1783             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1784             ids = orderedSet(m.group(1) for m in m_ids)
1785             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1786             if info['paging'] is None:
1787                 break
1788         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1789
1790 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1791     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1792     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1793     _FEED_NAME = 'subscriptions'
1794     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1795
1796 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1797     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1798     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1799     _FEED_NAME = 'recommended'
1800     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1801
1802 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1803     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1804     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1805     _FEED_NAME = 'watch_later'
1806     _PLAYLIST_TITLE = u'Youtube Watch Later'
1807     _PAGING_STEP = 100
1808     _PERSONAL_FEED = True
1809
1810 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1811     IE_NAME = u'youtube:favorites'
1812     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1814     _LOGIN_REQUIRED = True
1815
1816     def _real_extract(self, url):
1817         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1818         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1819         return self.url_result(playlist_id, 'YoutubePlaylist')