_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381     ]
 382
 383
 384     @classmethod
 385     def suitable(cls, url):
 386         """Receives a URL and returns True if suitable for this IE."""
 387         if YoutubePlaylistIE.suitable(url): return False
 388         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 389
 390     def __init__(self, *args, **kwargs):
 391         super(YoutubeIE, self).__init__(*args, **kwargs)
 392         self._player_cache = {}
 393
 394     def report_video_webpage_download(self, video_id):
 395         """Report attempt to download video webpage."""
 396         self.to_screen(u'%s: Downloading video webpage' % video_id)
 397
 398     def report_video_info_webpage_download(self, video_id):
 399         """Report attempt to download video info webpage."""
 400         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 401
 402     def report_information_extraction(self, video_id):
 403         """Report attempt to extract video information."""
 404         self.to_screen(u'%s: Extracting video information' % video_id)
 405
 406     def report_unavailable_format(self, video_id, format):
 407         """Report extracted video URL."""
 408         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 409
 410     def report_rtmp_download(self):
 411         """Indicate the download will use the RTMP protocol."""
 412         self.to_screen(u'RTMP download detected')
 413
 414     def _extract_signature_function(self, video_id, player_url, slen):
 415         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 416                         player_url)
 417         player_type = id_m.group('ext')
 418         player_id = id_m.group('id')
 419
 420         # Read from filesystem cache
 421         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 422         assert os.path.basename(func_id) == func_id
 423         cache_dir = self._downloader.params.get('cachedir',
 424                                                 u'~/.youtube-dl/cache')
 425
 426         cache_enabled = cache_dir is not None
 427         if cache_enabled:
 428             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 429                                     u'youtube-sigfuncs',
 430                                     func_id + '.json')
 431             try:
 432                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 433                     cache_spec = json.load(cachef)
 434                 return lambda s: u''.join(s[i] for i in cache_spec)
 435             except IOError:
 436                 pass  # No cache available
 437
 438         if player_type == 'js':
 439             code = self._download_webpage(
 440                 player_url, video_id,
 441                 note=u'Downloading %s player %s' % (player_type, player_id),
 442                 errnote=u'Download of %s failed' % player_url)
 443             res = self._parse_sig_js(code)
 444         elif player_type == 'swf':
 445             urlh = self._request_webpage(
 446                 player_url, video_id,
 447                 note=u'Downloading %s player %s' % (player_type, player_id),
 448                 errnote=u'Download of %s failed' % player_url)
 449             code = urlh.read()
 450             res = self._parse_sig_swf(code)
 451         else:
 452             assert False, 'Invalid player type %r' % player_type
 453
 454         if cache_enabled:
 455             try:
 456                 test_string = u''.join(map(compat_chr, range(slen)))
 457                 cache_res = res(test_string)
 458                 cache_spec = [ord(c) for c in cache_res]
 459                 try:
 460                     os.makedirs(os.path.dirname(cache_fn))
 461                 except OSError as ose:
 462                     if ose.errno != errno.EEXIST:
 463                         raise
 464                 write_json_file(cache_spec, cache_fn)
 465             except Exception:
 466                 tb = traceback.format_exc()
 467                 self._downloader.report_warning(
 468                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 469
 470         return res
 471
 472     def _print_sig_code(self, func, slen):
 473         def gen_sig_code(idxs):
 474             def _genslice(start, end, step):
 475                 starts = u'' if start == 0 else str(start)
 476                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 477                 steps = u'' if step == 1 else (u':%d' % step)
 478                 return u's[%s%s%s]' % (starts, ends, steps)
 479
 480             step = None
 481             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 482                                     # set as soon as step is set
 483             for i, prev in zip(idxs[1:], idxs[:-1]):
 484                 if step is not None:
 485                     if i - prev == step:
 486                         continue
 487                     yield _genslice(start, prev, step)
 488                     step = None
 489                     continue
 490                 if i - prev in [-1, 1]:
 491                     step = i - prev
 492                     start = prev
 493                     continue
 494                 else:
 495                     yield u's[%d]' % prev
 496             if step is None:
 497                 yield u's[%d]' % i
 498             else:
 499                 yield _genslice(start, i, step)
 500
 501         test_string = u''.join(map(compat_chr, range(slen)))
 502         cache_res = func(test_string)
 503         cache_spec = [ord(c) for c in cache_res]
 504         expr_code = u' + '.join(gen_sig_code(cache_spec))
 505         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 506         self.to_screen(u'Extracted signature function:\n' + code)
 507
 508     def _parse_sig_js(self, jscode):
 509         funcname = self._search_regex(
 510             r'signature=([a-zA-Z]+)', jscode,
 511             u'Initial JS player signature function name')
 512
 513         functions = {}
 514
 515         def argidx(varname):
 516             return string.lowercase.index(varname)
 517
 518         def interpret_statement(stmt, local_vars, allow_recursion=20):
 519             if allow_recursion < 0:
 520                 raise ExtractorError(u'Recursion limit reached')
 521
 522             if stmt.startswith(u'var '):
 523                 stmt = stmt[len(u'var '):]
 524             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 525                              r'=(?P<expr>.*)$', stmt)
 526             if ass_m:
 527                 if ass_m.groupdict().get('index'):
 528                     def assign(val):
 529                         lvar = local_vars[ass_m.group('out')]
 530                         idx = interpret_expression(ass_m.group('index'),
 531                                                    local_vars, allow_recursion)
 532                         assert isinstance(idx, int)
 533                         lvar[idx] = val
 534                         return val
 535                     expr = ass_m.group('expr')
 536                 else:
 537                     def assign(val):
 538                         local_vars[ass_m.group('out')] = val
 539                         return val
 540                     expr = ass_m.group('expr')
 541             elif stmt.startswith(u'return '):
 542                 assign = lambda v: v
 543                 expr = stmt[len(u'return '):]
 544             else:
 545                 raise ExtractorError(
 546                     u'Cannot determine left side of statement in %r' % stmt)
 547
 548             v = interpret_expression(expr, local_vars, allow_recursion)
 549             return assign(v)
 550
 551         def interpret_expression(expr, local_vars, allow_recursion):
 552             if expr.isdigit():
 553                 return int(expr)
 554
 555             if expr.isalpha():
 556                 return local_vars[expr]
 557
 558             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 559             if m:
 560                 member = m.group('member')
 561                 val = local_vars[m.group('in')]
 562                 if member == 'split("")':
 563                     return list(val)
 564                 if member == 'join("")':
 565                     return u''.join(val)
 566                 if member == 'length':
 567                     return len(val)
 568                 if member == 'reverse()':
 569                     return val[::-1]
 570                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 571                 if slice_m:
 572                     idx = interpret_expression(
 573                         slice_m.group('idx'), local_vars, allow_recursion-1)
 574                     return val[idx:]
 575
 576             m = re.match(
 577                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 578             if m:
 579                 val = local_vars[m.group('in')]
 580                 idx = interpret_expression(m.group('idx'), local_vars,
 581                                            allow_recursion-1)
 582                 return val[idx]
 583
 584             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 585             if m:
 586                 a = interpret_expression(m.group('a'),
 587                                          local_vars, allow_recursion)
 588                 b = interpret_expression(m.group('b'),
 589                                          local_vars, allow_recursion)
 590                 return a % b
 591
 592             m = re.match(
 593                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 594             if m:
 595                 fname = m.group('func')
 596                 if fname not in functions:
 597                     functions[fname] = extract_function(fname)
 598                 argvals = [int(v) if v.isdigit() else local_vars[v]
 599                            for v in m.group('args').split(',')]
 600                 return functions[fname](argvals)
 601             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 602
 603         def extract_function(funcname):
 604             func_m = re.search(
 605                 r'function ' + re.escape(funcname) +
 606                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 607                 jscode)
 608             argnames = func_m.group('args').split(',')
 609
 610             def resf(args):
 611                 local_vars = dict(zip(argnames, args))
 612                 for stmt in func_m.group('code').split(';'):
 613                     res = interpret_statement(stmt, local_vars)
 614                 return res
 615             return resf
 616
 617         initial_function = extract_function(funcname)
 618         return lambda s: initial_function([s])
 619
 620     def _parse_sig_swf(self, file_contents):
 621         if file_contents[1:3] != b'WS':
 622             raise ExtractorError(
 623                 u'Not an SWF file; header is %r' % file_contents[:3])
 624         if file_contents[:1] == b'C':
 625             content = zlib.decompress(file_contents[8:])
 626         else:
 627             raise NotImplementedError(u'Unsupported compression format %r' %
 628                                       file_contents[:1])
 629
 630         def extract_tags(content):
 631             pos = 0
 632             while pos < len(content):
 633                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 634                 pos += 2
 635                 tag_code = header16 >> 6
 636                 tag_len = header16 & 0x3f
 637                 if tag_len == 0x3f:
 638                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 639                     pos += 4
 640                 assert pos+tag_len <= len(content)
 641                 yield (tag_code, content[pos:pos+tag_len])
 642                 pos += tag_len
 643
 644         code_tag = next(tag
 645                         for tag_code, tag in extract_tags(content)
 646                         if tag_code == 82)
 647         p = code_tag.index(b'\0', 4) + 1
 648         code_reader = io.BytesIO(code_tag[p:])
 649
 650         # Parse ABC (AVM2 ByteCode)
 651         def read_int(reader=None):
 652             if reader is None:
 653                 reader = code_reader
 654             res = 0
 655             shift = 0
 656             for _ in range(5):
 657                 buf = reader.read(1)
 658                 assert len(buf) == 1
 659                 b = struct.unpack('<B', buf)[0]
 660                 res = res | ((b & 0x7f) << shift)
 661                 if b & 0x80 == 0:
 662                     break
 663                 shift += 7
 664             return res
 665
 666         def u30(reader=None):
 667             res = read_int(reader)
 668             assert res & 0xf0000000 == 0
 669             return res
 670         u32 = read_int
 671
 672         def s32(reader=None):
 673             v = read_int(reader)
 674             if v & 0x80000000 != 0:
 675                 v = - ((v ^ 0xffffffff) + 1)
 676             return v
 677
 678         def read_string(reader=None):
 679             if reader is None:
 680                 reader = code_reader
 681             slen = u30(reader)
 682             resb = reader.read(slen)
 683             assert len(resb) == slen
 684             return resb.decode('utf-8')
 685
 686         def read_bytes(count, reader=None):
 687             if reader is None:
 688                 reader = code_reader
 689             resb = reader.read(count)
 690             assert len(resb) == count
 691             return resb
 692
 693         def read_byte(reader=None):
 694             resb = read_bytes(1, reader=reader)
 695             res = struct.unpack('<B', resb)[0]
 696             return res
 697
 698         # minor_version + major_version
 699         read_bytes(2 + 2)
 700
 701         # Constant pool
 702         int_count = u30()
 703         for _c in range(1, int_count):
 704             s32()
 705         uint_count = u30()
 706         for _c in range(1, uint_count):
 707             u32()
 708         double_count = u30()
 709         read_bytes((double_count-1) * 8)
 710         string_count = u30()
 711         constant_strings = [u'']
 712         for _c in range(1, string_count):
 713             s = read_string()
 714             constant_strings.append(s)
 715         namespace_count = u30()
 716         for _c in range(1, namespace_count):
 717             read_bytes(1)  # kind
 718             u30()  # name
 719         ns_set_count = u30()
 720         for _c in range(1, ns_set_count):
 721             count = u30()
 722             for _c2 in range(count):
 723                 u30()
 724         multiname_count = u30()
 725         MULTINAME_SIZES = {
 726             0x07: 2,  # QName
 727             0x0d: 2,  # QNameA
 728             0x0f: 1,  # RTQName
 729             0x10: 1,  # RTQNameA
 730             0x11: 0,  # RTQNameL
 731             0x12: 0,  # RTQNameLA
 732             0x09: 2,  # Multiname
 733             0x0e: 2,  # MultinameA
 734             0x1b: 1,  # MultinameL
 735             0x1c: 1,  # MultinameLA
 736         }
 737         multinames = [u'']
 738         for _c in range(1, multiname_count):
 739             kind = u30()
 740             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 741             if kind == 0x07:
 742                 u30()  # namespace_idx
 743                 name_idx = u30()
 744                 multinames.append(constant_strings[name_idx])
 745             else:
 746                 multinames.append('[MULTINAME kind: %d]' % kind)
 747                 for _c2 in range(MULTINAME_SIZES[kind]):
 748                     u30()
 749
 750         # Methods
 751         method_count = u30()
 752         MethodInfo = collections.namedtuple(
 753             'MethodInfo',
 754             ['NEED_ARGUMENTS', 'NEED_REST'])
 755         method_infos = []
 756         for method_id in range(method_count):
 757             param_count = u30()
 758             u30()  # return type
 759             for _ in range(param_count):
 760                 u30()  # param type
 761             u30()  # name index (always 0 for youtube)
 762             flags = read_byte()
 763             if flags & 0x08 != 0:
 764                 # Options present
 765                 option_count = u30()
 766                 for c in range(option_count):
 767                     u30()  # val
 768                     read_bytes(1)  # kind
 769             if flags & 0x80 != 0:
 770                 # Param names present
 771                 for _ in range(param_count):
 772                     u30()  # param name
 773             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 774             method_infos.append(mi)
 775
 776         # Metadata
 777         metadata_count = u30()
 778         for _c in range(metadata_count):
 779             u30()  # name
 780             item_count = u30()
 781             for _c2 in range(item_count):
 782                 u30()  # key
 783                 u30()  # value
 784
 785         def parse_traits_info():
 786             trait_name_idx = u30()
 787             kind_full = read_byte()
 788             kind = kind_full & 0x0f
 789             attrs = kind_full >> 4
 790             methods = {}
 791             if kind in [0x00, 0x06]:  # Slot or Const
 792                 u30()  # Slot id
 793                 u30()  # type_name_idx
 794                 vindex = u30()
 795                 if vindex != 0:
 796                     read_byte()  # vkind
 797             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 798                 u30()  # disp_id
 799                 method_idx = u30()
 800                 methods[multinames[trait_name_idx]] = method_idx
 801             elif kind == 0x04:  # Class
 802                 u30()  # slot_id
 803                 u30()  # classi
 804             elif kind == 0x05:  # Function
 805                 u30()  # slot_id
 806                 function_idx = u30()
 807                 methods[function_idx] = multinames[trait_name_idx]
 808             else:
 809                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 810
 811             if attrs & 0x4 != 0:  # Metadata present
 812                 metadata_count = u30()
 813                 for _c3 in range(metadata_count):
 814                     u30()  # metadata index
 815
 816             return methods
 817
 818         # Classes
 819         TARGET_CLASSNAME = u'SignatureDecipher'
 820         searched_idx = multinames.index(TARGET_CLASSNAME)
 821         searched_class_id = None
 822         class_count = u30()
 823         for class_id in range(class_count):
 824             name_idx = u30()
 825             if name_idx == searched_idx:
 826                 # We found the class we're looking for!
 827                 searched_class_id = class_id
 828             u30()  # super_name idx
 829             flags = read_byte()
 830             if flags & 0x08 != 0:  # Protected namespace is present
 831                 u30()  # protected_ns_idx
 832             intrf_count = u30()
 833             for _c2 in range(intrf_count):
 834                 u30()
 835             u30()  # iinit
 836             trait_count = u30()
 837             for _c2 in range(trait_count):
 838                 parse_traits_info()
 839
 840         if searched_class_id is None:
 841             raise ExtractorError(u'Target class %r not found' %
 842                                  TARGET_CLASSNAME)
 843
 844         method_names = {}
 845         method_idxs = {}
 846         for class_id in range(class_count):
 847             u30()  # cinit
 848             trait_count = u30()
 849             for _c2 in range(trait_count):
 850                 trait_methods = parse_traits_info()
 851                 if class_id == searched_class_id:
 852                     method_names.update(trait_methods.items())
 853                     method_idxs.update(dict(
 854                         (idx, name)
 855                         for name, idx in trait_methods.items()))
 856
 857         # Scripts
 858         script_count = u30()
 859         for _c in range(script_count):
 860             u30()  # init
 861             trait_count = u30()
 862             for _c2 in range(trait_count):
 863                 parse_traits_info()
 864
 865         # Method bodies
 866         method_body_count = u30()
 867         Method = collections.namedtuple('Method', ['code', 'local_count'])
 868         methods = {}
 869         for _c in range(method_body_count):
 870             method_idx = u30()
 871             u30()  # max_stack
 872             local_count = u30()
 873             u30()  # init_scope_depth
 874             u30()  # max_scope_depth
 875             code_length = u30()
 876             code = read_bytes(code_length)
 877             if method_idx in method_idxs:
 878                 m = Method(code, local_count)
 879                 methods[method_idxs[method_idx]] = m
 880             exception_count = u30()
 881             for _c2 in range(exception_count):
 882                 u30()  # from
 883                 u30()  # to
 884                 u30()  # target
 885                 u30()  # exc_type
 886                 u30()  # var_name
 887             trait_count = u30()
 888             for _c2 in range(trait_count):
 889                 parse_traits_info()
 890
 891         assert p + code_reader.tell() == len(code_tag)
 892         assert len(methods) == len(method_idxs)
 893
 894         method_pyfunctions = {}
 895
 896         def extract_function(func_name):
 897             if func_name in method_pyfunctions:
 898                 return method_pyfunctions[func_name]
 899             if func_name not in methods:
 900                 raise ExtractorError(u'Cannot find function %r' % func_name)
 901             m = methods[func_name]
 902
 903             def resfunc(args):
 904                 registers = ['(this)'] + list(args) + [None] * m.local_count
 905                 stack = []
 906                 coder = io.BytesIO(m.code)
 907                 while True:
 908                     opcode = struct.unpack('!B', coder.read(1))[0]
 909                     if opcode == 36:  # pushbyte
 910                         v = struct.unpack('!B', coder.read(1))[0]
 911                         stack.append(v)
 912                     elif opcode == 44:  # pushstring
 913                         idx = u30(coder)
 914                         stack.append(constant_strings[idx])
 915                     elif opcode == 48:  # pushscope
 916                         # We don't implement the scope register, so we'll just
 917                         # ignore the popped value
 918                         stack.pop()
 919                     elif opcode == 70:  # callproperty
 920                         index = u30(coder)
 921                         mname = multinames[index]
 922                         arg_count = u30(coder)
 923                         args = list(reversed(
 924                             [stack.pop() for _ in range(arg_count)]))
 925                         obj = stack.pop()
 926                         if mname == u'split':
 927                             assert len(args) == 1
 928                             assert isinstance(args[0], compat_str)
 929                             assert isinstance(obj, compat_str)
 930                             if args[0] == u'':
 931                                 res = list(obj)
 932                             else:
 933                                 res = obj.split(args[0])
 934                             stack.append(res)
 935                         elif mname == u'slice':
 936                             assert len(args) == 1
 937                             assert isinstance(args[0], int)
 938                             assert isinstance(obj, list)
 939                             res = obj[args[0]:]
 940                             stack.append(res)
 941                         elif mname == u'join':
 942                             assert len(args) == 1
 943                             assert isinstance(args[0], compat_str)
 944                             assert isinstance(obj, list)
 945                             res = args[0].join(obj)
 946                             stack.append(res)
 947                         elif mname in method_pyfunctions:
 948                             stack.append(method_pyfunctions[mname](args))
 949                         else:
 950                             raise NotImplementedError(
 951                                 u'Unsupported property %r on %r'
 952                                 % (mname, obj))
 953                     elif opcode == 72:  # returnvalue
 954                         res = stack.pop()
 955                         return res
 956                     elif opcode == 79:  # callpropvoid
 957                         index = u30(coder)
 958                         mname = multinames[index]
 959                         arg_count = u30(coder)
 960                         args = list(reversed(
 961                             [stack.pop() for _ in range(arg_count)]))
 962                         obj = stack.pop()
 963                         if mname == u'reverse':
 964                             assert isinstance(obj, list)
 965                             obj.reverse()
 966                         else:
 967                             raise NotImplementedError(
 968                                 u'Unsupported (void) property %r on %r'
 969                                 % (mname, obj))
 970                     elif opcode == 93:  # findpropstrict
 971                         index = u30(coder)
 972                         mname = multinames[index]
 973                         res = extract_function(mname)
 974                         stack.append(res)
 975                     elif opcode == 97:  # setproperty
 976                         index = u30(coder)
 977                         value = stack.pop()
 978                         idx = stack.pop()
 979                         obj = stack.pop()
 980                         assert isinstance(obj, list)
 981                         assert isinstance(idx, int)
 982                         obj[idx] = value
 983                     elif opcode == 98:  # getlocal
 984                         index = u30(coder)
 985                         stack.append(registers[index])
 986                     elif opcode == 99:  # setlocal
 987                         index = u30(coder)
 988                         value = stack.pop()
 989                         registers[index] = value
 990                     elif opcode == 102:  # getproperty
 991                         index = u30(coder)
 992                         pname = multinames[index]
 993                         if pname == u'length':
 994                             obj = stack.pop()
 995                             assert isinstance(obj, list)
 996                             stack.append(len(obj))
 997                         else:  # Assume attribute access
 998                             idx = stack.pop()
 999                             assert isinstance(idx, int)
1000                             obj = stack.pop()
1001                             assert isinstance(obj, list)
1002                             stack.append(obj[idx])
1003                     elif opcode == 128:  # coerce
1004                         u30(coder)
1005                     elif opcode == 133:  # coerce_s
1006                         assert isinstance(stack[-1], (type(None), compat_str))
1007                     elif opcode == 164:  # modulo
1008                         value2 = stack.pop()
1009                         value1 = stack.pop()
1010                         res = value1 % value2
1011                         stack.append(res)
1012                     elif opcode == 208:  # getlocal_0
1013                         stack.append(registers[0])
1014                     elif opcode == 209:  # getlocal_1
1015                         stack.append(registers[1])
1016                     elif opcode == 210:  # getlocal_2
1017                         stack.append(registers[2])
1018                     elif opcode == 211:  # getlocal_3
1019                         stack.append(registers[3])
1020                     elif opcode == 214:  # setlocal_2
1021                         registers[2] = stack.pop()
1022                     elif opcode == 215:  # setlocal_3
1023                         registers[3] = stack.pop()
1024                     else:
1025                         raise NotImplementedError(
1026                             u'Unsupported opcode %d' % opcode)
1027
1028             method_pyfunctions[func_name] = resfunc
1029             return resfunc
1030
1031         initial_function = extract_function(u'decipher')
1032         return lambda s: initial_function([s])
1033
1034     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1035         """Turn the encrypted s field into a working signature"""
1036
1037         if player_url is not None:
1038             try:
1039                 if player_url not in self._player_cache:
1040                     func = self._extract_signature_function(
1041                         video_id, player_url, len(s)
1042                     )
1043                     self._player_cache[player_url] = func
1044                 func = self._player_cache[player_url]
1045                 if self._downloader.params.get('youtube_print_sig_code'):
1046                     self._print_sig_code(func, len(s))
1047                 return func(s)
1048             except Exception:
1049                 tb = traceback.format_exc()
1050                 self._downloader.report_warning(
1051                     u'Automatic signature extraction failed: ' + tb)
1052
1053             self._downloader.report_warning(
1054                 u'Warning: Falling back to static signature algorithm')
1055
1056         return self._static_decrypt_signature(
1057             s, video_id, player_url, age_gate)
1058
1059     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1060         if age_gate:
1061             # The videos with age protection use another player, so the
1062             # algorithms can be different.
1063             if len(s) == 86:
1064                 return s[2:63] + s[82] + s[64:82] + s[63]
1065
1066         if len(s) == 93:
1067             return s[86:29:-1] + s[88] + s[28:5:-1]
1068         elif len(s) == 92:
1069             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1070         elif len(s) == 91:
1071             return s[84:27:-1] + s[86] + s[26:5:-1]
1072         elif len(s) == 90:
1073             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1074         elif len(s) == 89:
1075             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1076         elif len(s) == 88:
1077             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1078         elif len(s) == 87:
1079             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1080         elif len(s) == 86:
1081             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1082         elif len(s) == 85:
1083             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1084         elif len(s) == 84:
1085             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1086         elif len(s) == 83:
1087             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1088         elif len(s) == 82:
1089             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1090         elif len(s) == 81:
1091             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1092         elif len(s) == 80:
1093             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1094         elif len(s) == 79:
1095             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1096
1097         else:
1098             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1099
1100     def _get_available_subtitles(self, video_id):
1101         try:
1102             sub_list = self._download_webpage(
1103                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1104                 video_id, note=False)
1105         except ExtractorError as err:
1106             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1107             return {}
1108         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1109
1110         sub_lang_list = {}
1111         for l in lang_list:
1112             lang = l[1]
1113             params = compat_urllib_parse.urlencode({
1114                 'lang': lang,
1115                 'v': video_id,
1116                 'fmt': self._downloader.params.get('subtitlesformat'),
1117             })
1118             url = u'http://www.youtube.com/api/timedtext?' + params
1119             sub_lang_list[lang] = url
1120         if not sub_lang_list:
1121             self._downloader.report_warning(u'video doesn\'t have subtitles')
1122             return {}
1123         return sub_lang_list
1124
1125     def _get_available_automatic_caption(self, video_id, webpage):
1126         """We need the webpage for getting the captions url, pass it as an
1127            argument to speed up the process."""
1128         sub_format = self._downloader.params.get('subtitlesformat')
1129         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1130         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1131         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1132         if mobj is None:
1133             self._downloader.report_warning(err_msg)
1134             return {}
1135         player_config = json.loads(mobj.group(1))
1136         try:
1137             args = player_config[u'args']
1138             caption_url = args[u'ttsurl']
1139             timestamp = args[u'timestamp']
1140             # We get the available subtitles
1141             list_params = compat_urllib_parse.urlencode({
1142                 'type': 'list',
1143                 'tlangs': 1,
1144                 'asrs': 1,
1145             })
1146             list_url = caption_url + '&' + list_params
1147             list_page = self._download_webpage(list_url, video_id)
1148             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1149             original_lang_node = caption_list.find('track')
1150             if original_lang_node.attrib.get('kind') != 'asr' :
1151                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1152                 return {}
1153             original_lang = original_lang_node.attrib['lang_code']
1154
1155             sub_lang_list = {}
1156             for lang_node in caption_list.findall('target'):
1157                 sub_lang = lang_node.attrib['lang_code']
1158                 params = compat_urllib_parse.urlencode({
1159                     'lang': original_lang,
1160                     'tlang': sub_lang,
1161                     'fmt': sub_format,
1162                     'ts': timestamp,
1163                     'kind': 'asr',
1164                 })
1165                 sub_lang_list[sub_lang] = caption_url + '&' + params
1166             return sub_lang_list
1167         # An extractor error can be raise by the download process if there are
1168         # no automatic captions but there are subtitles
1169         except (KeyError, ExtractorError):
1170             self._downloader.report_warning(err_msg)
1171             return {}
1172
1173     def _print_formats(self, formats):
1174         print('Available formats:')
1175         for x in formats:
1176             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1177                                         self._video_dimensions.get(x, '???'),
1178                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1179
1180     def _extract_id(self, url):
1181         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1182         if mobj is None:
1183             raise ExtractorError(u'Invalid URL: %s' % url)
1184         video_id = mobj.group(2)
1185         return video_id
1186
1187     def _get_video_url_list(self, url_map):
1188         """
1189         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1190         with the requested formats.
1191         """
1192         req_format = self._downloader.params.get('format', None)
1193         format_limit = self._downloader.params.get('format_limit', None)
1194         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1195         if format_limit is not None and format_limit in available_formats:
1196             format_list = available_formats[available_formats.index(format_limit):]
1197         else:
1198             format_list = available_formats
1199         existing_formats = [x for x in format_list if x in url_map]
1200         if len(existing_formats) == 0:
1201             raise ExtractorError(u'no known formats available for video')
1202         if self._downloader.params.get('listformats', None):
1203             self._print_formats(existing_formats)
1204             return
1205         if req_format is None or req_format == 'best':
1206             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1207         elif req_format == 'worst':
1208             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1209         elif req_format in ('-1', 'all'):
1210             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1211         else:
1212             # Specific formats. We pick the first in a slash-delimeted sequence.
1213             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1214             # available in the specified format. For example,
1215             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1216             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1217             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1218             req_formats = req_format.split('/')
1219             video_url_list = None
1220             for rf in req_formats:
1221                 if rf in url_map:
1222                     video_url_list = [(rf, url_map[rf])]
1223                     break
1224                 if rf in self._video_formats_map:
1225                     for srf in self._video_formats_map[rf]:
1226                         if srf in url_map:
1227                             video_url_list = [(srf, url_map[srf])]
1228                             break
1229                     else:
1230                         continue
1231                     break
1232             if video_url_list is None:
1233                 raise ExtractorError(u'requested format not available')
1234         return video_url_list
1235
1236     def _extract_from_m3u8(self, manifest_url, video_id):
1237         url_map = {}
1238         def _get_urls(_manifest):
1239             lines = _manifest.split('\n')
1240             urls = filter(lambda l: l and not l.startswith('#'),
1241                             lines)
1242             return urls
1243         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1244         formats_urls = _get_urls(manifest)
1245         for format_url in formats_urls:
1246             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1247             url_map[itag] = format_url
1248         return url_map
1249
1250     def _real_extract(self, url):
1251         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1252             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1253
1254         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1255         mobj = re.search(self._NEXT_URL_RE, url)
1256         if mobj:
1257             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1258         video_id = self._extract_id(url)
1259
1260         # Get video webpage
1261         self.report_video_webpage_download(video_id)
1262         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1263         request = compat_urllib_request.Request(url)
1264         try:
1265             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1266         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1267             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1268
1269         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1270
1271         # Attempt to extract SWF player URL
1272         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1273         if mobj is not None:
1274             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1275         else:
1276             player_url = None
1277
1278         # Get video info
1279         self.report_video_info_webpage_download(video_id)
1280         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1281             self.report_age_confirmation()
1282             age_gate = True
1283             # We simulate the access to the video from www.youtube.com/v/{video_id}
1284             # this can be viewed without login into Youtube
1285             data = compat_urllib_parse.urlencode({'video_id': video_id,
1286                                                   'el': 'embedded',
1287                                                   'gl': 'US',
1288                                                   'hl': 'en',
1289                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1290                                                   'asv': 3,
1291                                                   'sts':'1588',
1292                                                   })
1293             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1294             video_info_webpage = self._download_webpage(video_info_url, video_id,
1295                                     note=False,
1296                                     errnote='unable to download video info webpage')
1297             video_info = compat_parse_qs(video_info_webpage)
1298         else:
1299             age_gate = False
1300             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1301                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1302                         % (video_id, el_type))
1303                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1304                                         note=False,
1305                                         errnote='unable to download video info webpage')
1306                 video_info = compat_parse_qs(video_info_webpage)
1307                 if 'token' in video_info:
1308                     break
1309         if 'token' not in video_info:
1310             if 'reason' in video_info:
1311                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1312             else:
1313                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1314
1315         # Check for "rental" videos
1316         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1317             raise ExtractorError(u'"rental" videos not supported')
1318
1319         # Start extracting information
1320         self.report_information_extraction(video_id)
1321
1322         # uploader
1323         if 'author' not in video_info:
1324             raise ExtractorError(u'Unable to extract uploader name')
1325         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1326
1327         # uploader_id
1328         video_uploader_id = None
1329         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1330         if mobj is not None:
1331             video_uploader_id = mobj.group(1)
1332         else:
1333             self._downloader.report_warning(u'unable to extract uploader nickname')
1334
1335         # title
1336         if 'title' not in video_info:
1337             raise ExtractorError(u'Unable to extract video title')
1338         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1339
1340         # thumbnail image
1341         # We try first to get a high quality image:
1342         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1343                             video_webpage, re.DOTALL)
1344         if m_thumb is not None:
1345             video_thumbnail = m_thumb.group(1)
1346         elif 'thumbnail_url' not in video_info:
1347             self._downloader.report_warning(u'unable to extract video thumbnail')
1348             video_thumbnail = None
1349         else:   # don't panic if we can't find it
1350             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1351
1352         # upload date
1353         upload_date = None
1354         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1355         if mobj is not None:
1356             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1357             upload_date = unified_strdate(upload_date)
1358
1359         # description
1360         video_description = get_element_by_id("eow-description", video_webpage)
1361         if video_description:
1362             video_description = clean_html(video_description)
1363         else:
1364             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1365             if fd_mobj:
1366                 video_description = unescapeHTML(fd_mobj.group(1))
1367             else:
1368                 video_description = u''
1369
1370         # subtitles
1371         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1372
1373         if self._downloader.params.get('listsubtitles', False):
1374             self._list_available_subtitles(video_id, video_webpage)
1375             return
1376
1377         if 'length_seconds' not in video_info:
1378             self._downloader.report_warning(u'unable to extract video duration')
1379             video_duration = ''
1380         else:
1381             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1382
1383         # Decide which formats to download
1384
1385         try:
1386             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1387             if not mobj:
1388                 raise ValueError('Could not find vevo ID')
1389             info = json.loads(mobj.group(1))
1390             args = info['args']
1391             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1392             # this signatures are encrypted
1393             if 'url_encoded_fmt_stream_map' not in args:
1394                 raise ValueError(u'No stream_map present')  # caught below
1395             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1396             if m_s is not None:
1397                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1398                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1399             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1400             if m_s is not None:
1401                 if 'url_encoded_fmt_stream_map' in video_info:
1402                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1403                 else:
1404                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1405             elif 'adaptive_fmts' in video_info:
1406                 if 'url_encoded_fmt_stream_map' in video_info:
1407                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1408                 else:
1409                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1410         except ValueError:
1411             pass
1412
1413         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1414             self.report_rtmp_download()
1415             video_url_list = [(None, video_info['conn'][0])]
1416         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1417             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1418                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1419             url_map = {}
1420             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1421                 url_data = compat_parse_qs(url_data_str)
1422                 if 'itag' in url_data and 'url' in url_data:
1423                     url = url_data['url'][0]
1424                     if 'sig' in url_data:
1425                         url += '&signature=' + url_data['sig'][0]
1426                     elif 's' in url_data:
1427                         encrypted_sig = url_data['s'][0]
1428                         if self._downloader.params.get('verbose'):
1429                             if age_gate:
1430                                 if player_url is None:
1431                                     player_version = 'unknown'
1432                                 else:
1433                                     player_version = self._search_regex(
1434                                         r'-(.+)\.swf$', player_url,
1435                                         u'flash player', fatal=False)
1436                                 player_desc = 'flash player %s' % player_version
1437                             else:
1438                                 player_version = self._search_regex(
1439                                     r'html5player-(.+?)\.js', video_webpage,
1440                                     'html5 player', fatal=False)
1441                                 player_desc = u'html5 player %s' % player_version
1442
1443                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1444                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1445                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1446
1447                         if not age_gate:
1448                             jsplayer_url_json = self._search_regex(
1449                                 r'"assets":.+?"js":\s*("[^"]+")',
1450                                 video_webpage, u'JS player URL')
1451                             player_url = json.loads(jsplayer_url_json)
1452
1453                         signature = self._decrypt_signature(
1454                             encrypted_sig, video_id, player_url, age_gate)
1455                         url += '&signature=' + signature
1456                     if 'ratebypass' not in url:
1457                         url += '&ratebypass=yes'
1458                     url_map[url_data['itag'][0]] = url
1459             video_url_list = self._get_video_url_list(url_map)
1460             if not video_url_list:
1461                 return
1462         elif video_info.get('hlsvp'):
1463             manifest_url = video_info['hlsvp'][0]
1464             url_map = self._extract_from_m3u8(manifest_url, video_id)
1465             video_url_list = self._get_video_url_list(url_map)
1466             if not video_url_list:
1467                 return
1468
1469         else:
1470             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1471
1472         results = []
1473         for format_param, video_real_url in video_url_list:
1474             # Extension
1475             video_extension = self._video_extensions.get(format_param, 'flv')
1476
1477             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1478                                               self._video_dimensions.get(format_param, '???'),
1479                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1480
1481             results.append({
1482                 'id':       video_id,
1483                 'url':      video_real_url,
1484                 'uploader': video_uploader,
1485                 'uploader_id': video_uploader_id,
1486                 'upload_date':  upload_date,
1487                 'title':    video_title,
1488                 'ext':      video_extension,
1489                 'format':   video_format,
1490                 'thumbnail':    video_thumbnail,
1491                 'description':  video_description,
1492                 'player_url':   player_url,
1493                 'subtitles':    video_subtitles,
1494                 'duration':     video_duration
1495             })
1496         return results
1497
1498 class YoutubePlaylistIE(InfoExtractor):
1499     IE_DESC = u'YouTube.com playlists'
1500     _VALID_URL = r"""(?:
1501                         (?:https?://)?
1502                         (?:\w+\.)?
1503                         youtube\.com/
1504                         (?:
1505                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1506                            \? (?:.*?&)*? (?:p|a|list)=
1507                         |  p/
1508                         )
1509                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1510                         .*
1511                      |
1512                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1513                      )"""
1514     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1515     _MAX_RESULTS = 50
1516     IE_NAME = u'youtube:playlist'
1517
1518     @classmethod
1519     def suitable(cls, url):
1520         """Receives a URL and returns True if suitable for this IE."""
1521         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1522
1523     def _real_extract(self, url):
1524         # Extract playlist id
1525         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1526         if mobj is None:
1527             raise ExtractorError(u'Invalid URL: %s' % url)
1528
1529         # Download playlist videos from API
1530         playlist_id = mobj.group(1) or mobj.group(2)
1531         videos = []
1532
1533         for page_num in itertools.count(1):
1534             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1535             if start_index >= 1000:
1536                 self._downloader.report_warning(u'Max number of results reached')
1537                 break
1538             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1539             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1540
1541             try:
1542                 response = json.loads(page)
1543             except ValueError as err:
1544                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1545
1546             if 'feed' not in response:
1547                 raise ExtractorError(u'Got a malformed response from YouTube API')
1548             playlist_title = response['feed']['title']['$t']
1549             if 'entry' not in response['feed']:
1550                 # Number of videos is a multiple of self._MAX_RESULTS
1551                 break
1552
1553             for entry in response['feed']['entry']:
1554                 index = entry['yt$position']['$t']
1555                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1556                     videos.append((
1557                         index,
1558                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1559                     ))
1560
1561         videos = [v[1] for v in sorted(videos)]
1562
1563         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1564         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1565
1566
1567 class YoutubeChannelIE(InfoExtractor):
1568     IE_DESC = u'YouTube.com channels'
1569     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1570     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1571     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1572     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1573     IE_NAME = u'youtube:channel'
1574
1575     def extract_videos_from_page(self, page):
1576         ids_in_page = []
1577         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1578             if mobj.group(1) not in ids_in_page:
1579                 ids_in_page.append(mobj.group(1))
1580         return ids_in_page
1581
1582     def _real_extract(self, url):
1583         # Extract channel id
1584         mobj = re.match(self._VALID_URL, url)
1585         if mobj is None:
1586             raise ExtractorError(u'Invalid URL: %s' % url)
1587
1588         # Download channel page
1589         channel_id = mobj.group(1)
1590         video_ids = []
1591         pagenum = 1
1592
1593         url = self._TEMPLATE_URL % (channel_id, pagenum)
1594         page = self._download_webpage(url, channel_id,
1595                                       u'Downloading page #%s' % pagenum)
1596
1597         # Extract video identifiers
1598         ids_in_page = self.extract_videos_from_page(page)
1599         video_ids.extend(ids_in_page)
1600
1601         # Download any subsequent channel pages using the json-based channel_ajax query
1602         if self._MORE_PAGES_INDICATOR in page:
1603             for pagenum in itertools.count(1):
1604                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1605                 page = self._download_webpage(url, channel_id,
1606                                               u'Downloading page #%s' % pagenum)
1607
1608                 page = json.loads(page)
1609
1610                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1611                 video_ids.extend(ids_in_page)
1612
1613                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1614                     break
1615
1616         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1617
1618         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1619         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1620         return [self.playlist_result(url_entries, channel_id)]
1621
1622
1623 class YoutubeUserIE(InfoExtractor):
1624     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1625     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1626     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1627     _GDATA_PAGE_SIZE = 50
1628     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1629     IE_NAME = u'youtube:user'
1630
1631     @classmethod
1632     def suitable(cls, url):
1633         # Don't return True if the url can be extracted with other youtube
1634         # extractor, the regex would is too permissive and it would match.
1635         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1636         if any(ie.suitable(url) for ie in other_ies): return False
1637         else: return super(YoutubeUserIE, cls).suitable(url)
1638
1639     def _real_extract(self, url):
1640         # Extract username
1641         mobj = re.match(self._VALID_URL, url)
1642         if mobj is None:
1643             raise ExtractorError(u'Invalid URL: %s' % url)
1644
1645         username = mobj.group(1)
1646
1647         # Download video ids using YouTube Data API. Result size per
1648         # query is limited (currently to 50 videos) so we need to query
1649         # page by page until there are no video ids - it means we got
1650         # all of them.
1651
1652         video_ids = []
1653
1654         for pagenum in itertools.count(0):
1655             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1656
1657             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1658             page = self._download_webpage(gdata_url, username,
1659                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1660
1661             try:
1662                 response = json.loads(page)
1663             except ValueError as err:
1664                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1665             if 'entry' not in response['feed']:
1666                 # Number of videos is a multiple of self._MAX_RESULTS
1667                 break
1668
1669             # Extract video identifiers
1670             ids_in_page = []
1671             for entry in response['feed']['entry']:
1672                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1673             video_ids.extend(ids_in_page)
1674
1675             # A little optimization - if current page is not
1676             # "full", ie. does not contain PAGE_SIZE video ids then
1677             # we can assume that this page is the last one - there
1678             # are no more ids on further pages - no need to query
1679             # again.
1680
1681             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1682                 break
1683
1684         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1685         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1686         return [self.playlist_result(url_results, playlist_title = username)]
1687
1688 class YoutubeSearchIE(SearchInfoExtractor):
1689     IE_DESC = u'YouTube.com searches'
1690     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1691     _MAX_RESULTS = 1000
1692     IE_NAME = u'youtube:search'
1693     _SEARCH_KEY = 'ytsearch'
1694
1695     def report_download_page(self, query, pagenum):
1696         """Report attempt to download search page with given number."""
1697         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1698
1699     def _get_n_results(self, query, n):
1700         """Get a specified number of results for a query"""
1701
1702         video_ids = []
1703         pagenum = 0
1704         limit = n
1705
1706         while (50 * pagenum) < limit:
1707             self.report_download_page(query, pagenum+1)
1708             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1709             request = compat_urllib_request.Request(result_url)
1710             try:
1711                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1712             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1713                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1714             api_response = json.loads(data)['data']
1715
1716             if not 'items' in api_response:
1717                 raise ExtractorError(u'[youtube] No video results')
1718
1719             new_ids = list(video['id'] for video in api_response['items'])
1720             video_ids += new_ids
1721
1722             limit = min(n, api_response['totalItems'])
1723             pagenum += 1
1724
1725         if len(video_ids) > n:
1726             video_ids = video_ids[:n]
1727         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1728         return self.playlist_result(videos, query)
1729
1730
1731 class YoutubeShowIE(InfoExtractor):
1732     IE_DESC = u'YouTube.com (multi-season) shows'
1733     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1734     IE_NAME = u'youtube:show'
1735
1736     def _real_extract(self, url):
1737         mobj = re.match(self._VALID_URL, url)
1738         show_name = mobj.group(1)
1739         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1740         # There's one playlist for each season of the show
1741         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1742         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1743         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1744
1745
1746 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1747     """
1748     Base class for extractors that fetch info from
1749     http://www.youtube.com/feed_ajax
1750     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1751     """
1752     _LOGIN_REQUIRED = True
1753     _PAGING_STEP = 30
1754     # use action_load_personal_feed instead of action_load_system_feed
1755     _PERSONAL_FEED = False
1756
1757     @property
1758     def _FEED_TEMPLATE(self):
1759         action = 'action_load_system_feed'
1760         if self._PERSONAL_FEED:
1761             action = 'action_load_personal_feed'
1762         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1763
1764     @property
1765     def IE_NAME(self):
1766         return u'youtube:%s' % self._FEED_NAME
1767
1768     def _real_initialize(self):
1769         self._login()
1770
1771     def _real_extract(self, url):
1772         feed_entries = []
1773         # The step argument is available only in 2.7 or higher
1774         for i in itertools.count(0):
1775             paging = i*self._PAGING_STEP
1776             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1777                                           u'%s feed' % self._FEED_NAME,
1778                                           u'Downloading page %s' % i)
1779             info = json.loads(info)
1780             feed_html = info['feed_html']
1781             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1782             ids = orderedSet(m.group(1) for m in m_ids)
1783             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1784             if info['paging'] is None:
1785                 break
1786         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1787
1788 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1789     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1790     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1791     _FEED_NAME = 'subscriptions'
1792     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1793
1794 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1795     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1796     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1797     _FEED_NAME = 'recommended'
1798     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1799
1800 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1801     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1802     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1803     _FEED_NAME = 'watch_later'
1804     _PLAYLIST_TITLE = u'Youtube Watch Later'
1805     _PAGING_STEP = 100
1806     _PERSONAL_FEED = True
1807
1808 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1809     IE_NAME = u'youtube:favorites'
1810     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1811     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1812     _LOGIN_REQUIRED = True
1813
1814     def _real_extract(self, url):
1815         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1816         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1817         return self.url_result(playlist_id, 'YoutubePlaylist')