_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_cachedir,
  31     get_element_by_id,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  78                                   login_page, u'Login GALX parameter')
  79
  80         # Log in
  81         login_form_strs = {
  82                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  83                 u'Email': username,
  84                 u'GALX': galx,
  85                 u'Passwd': password,
  86                 u'PersistentCookie': u'yes',
  87                 u'_utf8': u'霱',
  88                 u'bgresponse': u'js_disabled',
  89                 u'checkConnection': u'',
  90                 u'checkedDomains': u'youtube',
  91                 u'dnConn': u'',
  92                 u'pstMsg': u'0',
  93                 u'rmShown': u'1',
  94                 u'secTok': u'',
  95                 u'signIn': u'Sign in',
  96                 u'timeStmp': u'',
  97                 u'service': u'youtube',
  98                 u'uilel': u'3',
  99                 u'hl': u'en_US',
 100         }
 101         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 102         # chokes on unicode
 103         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 104         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 105         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 106         try:
 107             self.report_login()
 108             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 109             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 110                 self._downloader.report_warning(u'unable to log in: bad username or password')
 111                 return False
 112         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 113             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 114             return False
 115         return True
 116
 117     def _confirm_age(self):
 118         age_form = {
 119                 'next_url':     '/',
 120                 'action_confirm':   'Confirm',
 121                 }
 122         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 123         try:
 124             self.report_age_confirmation()
 125             compat_urllib_request.urlopen(request).read().decode('utf-8')
 126         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 127             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 128         return True
 129
 130     def _real_initialize(self):
 131         if self._downloader is None:
 132             return
 133         if not self._set_language():
 134             return
 135         if not self._login():
 136             return
 137         self._confirm_age()
 138
 139
 140 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 141     IE_DESC = u'YouTube.com'
 142     _VALID_URL = r"""^
 143                      (
 144                          (?:https?://)?                                       # http(s):// (optional)
 145                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 146                             tube\.majestyc\.net/|
 147                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 148                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 149                          (?:                                                  # the various things that can precede the ID:
 150                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 151                              |(?:                                             # or the v= param in all its forms
 152                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 153                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 154                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 155                                  v=
 156                              )
 157                          ))
 158                          |youtu\.be/                                          # just youtu.be/xxxx
 159                          )
 160                      )?                                                       # all until now is optional -> you can pass the naked ID
 161                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 162                      (?(1).+)?                                                # if we found the ID, everything can follow
 163                      $"""
 164     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 165     # Listed in order of quality
 166     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 167                           # Apple HTTP Live Streaming
 168                           '96', '95', '94', '93', '92', '132', '151',
 169                           # 3D
 170                           '85', '84', '102', '83', '101', '82', '100',
 171                           # Dash video
 172                           '138', '137', '248', '136', '247', '135', '246',
 173                           '245', '244', '134', '243', '133', '242', '160',
 174                           # Dash audio
 175                           '141', '172', '140', '171', '139',
 176                           ]
 177     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 178                                       # Apple HTTP Live Streaming
 179                                       '96', '95', '94', '93', '92', '132', '151',
 180                                       # 3D
 181                                       '85', '102', '84', '101', '83', '100', '82',
 182                                       # Dash video
 183                                       '138', '248', '137', '247', '136', '246', '245',
 184                                       '244', '135', '243', '134', '242', '133', '160',
 185                                       # Dash audio
 186                                       '172', '141', '171', '140', '139',
 187                                       ]
 188     _video_formats_map = {
 189         'flv': ['35', '34', '6', '5'],
 190         '3gp': ['36', '17', '13'],
 191         'mp4': ['38', '37', '22', '18'],
 192         'webm': ['46', '45', '44', '43'],
 193     }
 194     _video_extensions = {
 195         '13': '3gp',
 196         '17': '3gp',
 197         '18': 'mp4',
 198         '22': 'mp4',
 199         '36': '3gp',
 200         '37': 'mp4',
 201         '38': 'mp4',
 202         '43': 'webm',
 203         '44': 'webm',
 204         '45': 'webm',
 205         '46': 'webm',
 206
 207         # 3d videos
 208         '82': 'mp4',
 209         '83': 'mp4',
 210         '84': 'mp4',
 211         '85': 'mp4',
 212         '100': 'webm',
 213         '101': 'webm',
 214         '102': 'webm',
 215
 216         # Apple HTTP Live Streaming
 217         '92': 'mp4',
 218         '93': 'mp4',
 219         '94': 'mp4',
 220         '95': 'mp4',
 221         '96': 'mp4',
 222         '132': 'mp4',
 223         '151': 'mp4',
 224
 225         # Dash mp4
 226         '133': 'mp4',
 227         '134': 'mp4',
 228         '135': 'mp4',
 229         '136': 'mp4',
 230         '137': 'mp4',
 231         '138': 'mp4',
 232         '160': 'mp4',
 233
 234         # Dash mp4 audio
 235         '139': 'm4a',
 236         '140': 'm4a',
 237         '141': 'm4a',
 238
 239         # Dash webm
 240         '171': 'webm',
 241         '172': 'webm',
 242         '242': 'webm',
 243         '243': 'webm',
 244         '244': 'webm',
 245         '245': 'webm',
 246         '246': 'webm',
 247         '247': 'webm',
 248         '248': 'webm',
 249     }
 250     _video_dimensions = {
 251         '5': '240x400',
 252         '6': '???',
 253         '13': '???',
 254         '17': '144x176',
 255         '18': '360x640',
 256         '22': '720x1280',
 257         '34': '360x640',
 258         '35': '480x854',
 259         '36': '240x320',
 260         '37': '1080x1920',
 261         '38': '3072x4096',
 262         '43': '360x640',
 263         '44': '480x854',
 264         '45': '720x1280',
 265         '46': '1080x1920',
 266         '82': '360p',
 267         '83': '480p',
 268         '84': '720p',
 269         '85': '1080p',
 270         '92': '240p',
 271         '93': '360p',
 272         '94': '480p',
 273         '95': '720p',
 274         '96': '1080p',
 275         '100': '360p',
 276         '101': '480p',
 277         '102': '720p',
 278         '132': '240p',
 279         '151': '72p',
 280         '133': '240p',
 281         '134': '360p',
 282         '135': '480p',
 283         '136': '720p',
 284         '137': '1080p',
 285         '138': '>1080p',
 286         '139': '48k',
 287         '140': '128k',
 288         '141': '256k',
 289         '160': '192p',
 290         '171': '128k',
 291         '172': '256k',
 292         '242': '240p',
 293         '243': '360p',
 294         '244': '480p',
 295         '245': '480p',
 296         '246': '480p',
 297         '247': '720p',
 298         '248': '1080p',
 299     }
 300     _special_itags = {
 301         '82': '3D',
 302         '83': '3D',
 303         '84': '3D',
 304         '85': '3D',
 305         '100': '3D',
 306         '101': '3D',
 307         '102': '3D',
 308         '133': 'DASH Video',
 309         '134': 'DASH Video',
 310         '135': 'DASH Video',
 311         '136': 'DASH Video',
 312         '137': 'DASH Video',
 313         '138': 'DASH Video',
 314         '139': 'DASH Audio',
 315         '140': 'DASH Audio',
 316         '141': 'DASH Audio',
 317         '160': 'DASH Video',
 318         '171': 'DASH Audio',
 319         '172': 'DASH Audio',
 320         '242': 'DASH Video',
 321         '243': 'DASH Video',
 322         '244': 'DASH Video',
 323         '245': 'DASH Video',
 324         '246': 'DASH Video',
 325         '247': 'DASH Video',
 326         '248': 'DASH Video',
 327     }
 328
 329     IE_NAME = u'youtube'
 330     _TESTS = [
 331         {
 332             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 333             u"file":  u"BaW_jenozKc.mp4",
 334             u"info_dict": {
 335                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 336                 u"uploader": u"Philipp Hagemeister",
 337                 u"uploader_id": u"phihag",
 338                 u"upload_date": u"20121002",
 339                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 340             }
 341         },
 342         {
 343             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 344             u"file":  u"UxxajLWwzqY.mp4",
 345             u"note": u"Test generic use_cipher_signature video (#897)",
 346             u"info_dict": {
 347                 u"upload_date": u"20120506",
 348                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 349                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 350                 u"uploader": u"Icona Pop",
 351                 u"uploader_id": u"IconaPop"
 352             }
 353         },
 354         {
 355             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 356             u"file":  u"07FYdnEawAQ.mp4",
 357             u"note": u"Test VEVO video with age protection (#956)",
 358             u"info_dict": {
 359                 u"upload_date": u"20130703",
 360                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 361                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 362                 u"uploader": u"justintimberlakeVEVO",
 363                 u"uploader_id": u"justintimberlakeVEVO"
 364             }
 365         },
 366     ]
 367
 368
 369     @classmethod
 370     def suitable(cls, url):
 371         """Receives a URL and returns True if suitable for this IE."""
 372         if YoutubePlaylistIE.suitable(url): return False
 373         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 374
 375     def __init__(self, *args, **kwargs):
 376         super(YoutubeIE, self).__init__(*args, **kwargs)
 377         self._player_cache = {}
 378
 379     def report_video_webpage_download(self, video_id):
 380         """Report attempt to download video webpage."""
 381         self.to_screen(u'%s: Downloading video webpage' % video_id)
 382
 383     def report_video_info_webpage_download(self, video_id):
 384         """Report attempt to download video info webpage."""
 385         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 386
 387     def report_information_extraction(self, video_id):
 388         """Report attempt to extract video information."""
 389         self.to_screen(u'%s: Extracting video information' % video_id)
 390
 391     def report_unavailable_format(self, video_id, format):
 392         """Report extracted video URL."""
 393         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 394
 395     def report_rtmp_download(self):
 396         """Indicate the download will use the RTMP protocol."""
 397         self.to_screen(u'RTMP download detected')
 398
 399     def _extract_signature_function(self, video_id, player_url, slen):
 400         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 401                         player_url)
 402         player_type = id_m.group('ext')
 403         player_id = id_m.group('id')
 404
 405         # Read from filesystem cache
 406         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 407         assert os.path.basename(func_id) == func_id
 408         cache_dir = get_cachedir(self._downloader.params)
 409
 410         cache_enabled = cache_dir is not None
 411         if cache_enabled:
 412             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 413                                     u'youtube-sigfuncs',
 414                                     func_id + '.json')
 415             try:
 416                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 417                     cache_spec = json.load(cachef)
 418                 return lambda s: u''.join(s[i] for i in cache_spec)
 419             except IOError:
 420                 pass  # No cache available
 421
 422         if player_type == 'js':
 423             code = self._download_webpage(
 424                 player_url, video_id,
 425                 note=u'Downloading %s player %s' % (player_type, player_id),
 426                 errnote=u'Download of %s failed' % player_url)
 427             res = self._parse_sig_js(code)
 428         elif player_type == 'swf':
 429             urlh = self._request_webpage(
 430                 player_url, video_id,
 431                 note=u'Downloading %s player %s' % (player_type, player_id),
 432                 errnote=u'Download of %s failed' % player_url)
 433             code = urlh.read()
 434             res = self._parse_sig_swf(code)
 435         else:
 436             assert False, 'Invalid player type %r' % player_type
 437
 438         if cache_enabled:
 439             try:
 440                 test_string = u''.join(map(compat_chr, range(slen)))
 441                 cache_res = res(test_string)
 442                 cache_spec = [ord(c) for c in cache_res]
 443                 try:
 444                     os.makedirs(os.path.dirname(cache_fn))
 445                 except OSError as ose:
 446                     if ose.errno != errno.EEXIST:
 447                         raise
 448                 write_json_file(cache_spec, cache_fn)
 449             except Exception:
 450                 tb = traceback.format_exc()
 451                 self._downloader.report_warning(
 452                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 453
 454         return res
 455
 456     def _print_sig_code(self, func, slen):
 457         def gen_sig_code(idxs):
 458             def _genslice(start, end, step):
 459                 starts = u'' if start == 0 else str(start)
 460                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 461                 steps = u'' if step == 1 else (u':%d' % step)
 462                 return u's[%s%s%s]' % (starts, ends, steps)
 463
 464             step = None
 465             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 466                                     # set as soon as step is set
 467             for i, prev in zip(idxs[1:], idxs[:-1]):
 468                 if step is not None:
 469                     if i - prev == step:
 470                         continue
 471                     yield _genslice(start, prev, step)
 472                     step = None
 473                     continue
 474                 if i - prev in [-1, 1]:
 475                     step = i - prev
 476                     start = prev
 477                     continue
 478                 else:
 479                     yield u's[%d]' % prev
 480             if step is None:
 481                 yield u's[%d]' % i
 482             else:
 483                 yield _genslice(start, i, step)
 484
 485         test_string = u''.join(map(compat_chr, range(slen)))
 486         cache_res = func(test_string)
 487         cache_spec = [ord(c) for c in cache_res]
 488         expr_code = u' + '.join(gen_sig_code(cache_spec))
 489         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 490         self.to_screen(u'Extracted signature function:\n' + code)
 491
 492     def _parse_sig_js(self, jscode):
 493         funcname = self._search_regex(
 494             r'signature=([a-zA-Z]+)', jscode,
 495             u'Initial JS player signature function name')
 496
 497         functions = {}
 498
 499         def argidx(varname):
 500             return string.lowercase.index(varname)
 501
 502         def interpret_statement(stmt, local_vars, allow_recursion=20):
 503             if allow_recursion < 0:
 504                 raise ExtractorError(u'Recursion limit reached')
 505
 506             if stmt.startswith(u'var '):
 507                 stmt = stmt[len(u'var '):]
 508             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 509                              r'=(?P<expr>.*)$', stmt)
 510             if ass_m:
 511                 if ass_m.groupdict().get('index'):
 512                     def assign(val):
 513                         lvar = local_vars[ass_m.group('out')]
 514                         idx = interpret_expression(ass_m.group('index'),
 515                                                    local_vars, allow_recursion)
 516                         assert isinstance(idx, int)
 517                         lvar[idx] = val
 518                         return val
 519                     expr = ass_m.group('expr')
 520                 else:
 521                     def assign(val):
 522                         local_vars[ass_m.group('out')] = val
 523                         return val
 524                     expr = ass_m.group('expr')
 525             elif stmt.startswith(u'return '):
 526                 assign = lambda v: v
 527                 expr = stmt[len(u'return '):]
 528             else:
 529                 raise ExtractorError(
 530                     u'Cannot determine left side of statement in %r' % stmt)
 531
 532             v = interpret_expression(expr, local_vars, allow_recursion)
 533             return assign(v)
 534
 535         def interpret_expression(expr, local_vars, allow_recursion):
 536             if expr.isdigit():
 537                 return int(expr)
 538
 539             if expr.isalpha():
 540                 return local_vars[expr]
 541
 542             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 543             if m:
 544                 member = m.group('member')
 545                 val = local_vars[m.group('in')]
 546                 if member == 'split("")':
 547                     return list(val)
 548                 if member == 'join("")':
 549                     return u''.join(val)
 550                 if member == 'length':
 551                     return len(val)
 552                 if member == 'reverse()':
 553                     return val[::-1]
 554                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 555                 if slice_m:
 556                     idx = interpret_expression(
 557                         slice_m.group('idx'), local_vars, allow_recursion-1)
 558                     return val[idx:]
 559
 560             m = re.match(
 561                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 562             if m:
 563                 val = local_vars[m.group('in')]
 564                 idx = interpret_expression(m.group('idx'), local_vars,
 565                                            allow_recursion-1)
 566                 return val[idx]
 567
 568             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 569             if m:
 570                 a = interpret_expression(m.group('a'),
 571                                          local_vars, allow_recursion)
 572                 b = interpret_expression(m.group('b'),
 573                                          local_vars, allow_recursion)
 574                 return a % b
 575
 576             m = re.match(
 577                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 578             if m:
 579                 fname = m.group('func')
 580                 if fname not in functions:
 581                     functions[fname] = extract_function(fname)
 582                 argvals = [int(v) if v.isdigit() else local_vars[v]
 583                            for v in m.group('args').split(',')]
 584                 return functions[fname](argvals)
 585             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 586
 587         def extract_function(funcname):
 588             func_m = re.search(
 589                 r'function ' + re.escape(funcname) +
 590                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 591                 jscode)
 592             argnames = func_m.group('args').split(',')
 593
 594             def resf(args):
 595                 local_vars = dict(zip(argnames, args))
 596                 for stmt in func_m.group('code').split(';'):
 597                     res = interpret_statement(stmt, local_vars)
 598                 return res
 599             return resf
 600
 601         initial_function = extract_function(funcname)
 602         return lambda s: initial_function([s])
 603
 604     def _parse_sig_swf(self, file_contents):
 605         if file_contents[1:3] != b'WS':
 606             raise ExtractorError(
 607                 u'Not an SWF file; header is %r' % file_contents[:3])
 608         if file_contents[:1] == b'C':
 609             content = zlib.decompress(file_contents[8:])
 610         else:
 611             raise NotImplementedError(u'Unsupported compression format %r' %
 612                                       file_contents[:1])
 613
 614         def extract_tags(content):
 615             pos = 0
 616             while pos < len(content):
 617                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 618                 pos += 2
 619                 tag_code = header16 >> 6
 620                 tag_len = header16 & 0x3f
 621                 if tag_len == 0x3f:
 622                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 623                     pos += 4
 624                 assert pos+tag_len <= len(content)
 625                 yield (tag_code, content[pos:pos+tag_len])
 626                 pos += tag_len
 627
 628         code_tag = next(tag
 629                         for tag_code, tag in extract_tags(content)
 630                         if tag_code == 82)
 631         p = code_tag.index(b'\0', 4) + 1
 632         code_reader = io.BytesIO(code_tag[p:])
 633
 634         # Parse ABC (AVM2 ByteCode)
 635         def read_int(reader=None):
 636             if reader is None:
 637                 reader = code_reader
 638             res = 0
 639             shift = 0
 640             for _ in range(5):
 641                 buf = reader.read(1)
 642                 assert len(buf) == 1
 643                 b = struct.unpack('<B', buf)[0]
 644                 res = res | ((b & 0x7f) << shift)
 645                 if b & 0x80 == 0:
 646                     break
 647                 shift += 7
 648             return res
 649
 650         def u30(reader=None):
 651             res = read_int(reader)
 652             assert res & 0xf0000000 == 0
 653             return res
 654         u32 = read_int
 655
 656         def s32(reader=None):
 657             v = read_int(reader)
 658             if v & 0x80000000 != 0:
 659                 v = - ((v ^ 0xffffffff) + 1)
 660             return v
 661
 662         def read_string(reader=None):
 663             if reader is None:
 664                 reader = code_reader
 665             slen = u30(reader)
 666             resb = reader.read(slen)
 667             assert len(resb) == slen
 668             return resb.decode('utf-8')
 669
 670         def read_bytes(count, reader=None):
 671             if reader is None:
 672                 reader = code_reader
 673             resb = reader.read(count)
 674             assert len(resb) == count
 675             return resb
 676
 677         def read_byte(reader=None):
 678             resb = read_bytes(1, reader=reader)
 679             res = struct.unpack('<B', resb)[0]
 680             return res
 681
 682         # minor_version + major_version
 683         read_bytes(2 + 2)
 684
 685         # Constant pool
 686         int_count = u30()
 687         for _c in range(1, int_count):
 688             s32()
 689         uint_count = u30()
 690         for _c in range(1, uint_count):
 691             u32()
 692         double_count = u30()
 693         read_bytes((double_count-1) * 8)
 694         string_count = u30()
 695         constant_strings = [u'']
 696         for _c in range(1, string_count):
 697             s = read_string()
 698             constant_strings.append(s)
 699         namespace_count = u30()
 700         for _c in range(1, namespace_count):
 701             read_bytes(1)  # kind
 702             u30()  # name
 703         ns_set_count = u30()
 704         for _c in range(1, ns_set_count):
 705             count = u30()
 706             for _c2 in range(count):
 707                 u30()
 708         multiname_count = u30()
 709         MULTINAME_SIZES = {
 710             0x07: 2,  # QName
 711             0x0d: 2,  # QNameA
 712             0x0f: 1,  # RTQName
 713             0x10: 1,  # RTQNameA
 714             0x11: 0,  # RTQNameL
 715             0x12: 0,  # RTQNameLA
 716             0x09: 2,  # Multiname
 717             0x0e: 2,  # MultinameA
 718             0x1b: 1,  # MultinameL
 719             0x1c: 1,  # MultinameLA
 720         }
 721         multinames = [u'']
 722         for _c in range(1, multiname_count):
 723             kind = u30()
 724             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 725             if kind == 0x07:
 726                 u30()  # namespace_idx
 727                 name_idx = u30()
 728                 multinames.append(constant_strings[name_idx])
 729             else:
 730                 multinames.append('[MULTINAME kind: %d]' % kind)
 731                 for _c2 in range(MULTINAME_SIZES[kind]):
 732                     u30()
 733
 734         # Methods
 735         method_count = u30()
 736         MethodInfo = collections.namedtuple(
 737             'MethodInfo',
 738             ['NEED_ARGUMENTS', 'NEED_REST'])
 739         method_infos = []
 740         for method_id in range(method_count):
 741             param_count = u30()
 742             u30()  # return type
 743             for _ in range(param_count):
 744                 u30()  # param type
 745             u30()  # name index (always 0 for youtube)
 746             flags = read_byte()
 747             if flags & 0x08 != 0:
 748                 # Options present
 749                 option_count = u30()
 750                 for c in range(option_count):
 751                     u30()  # val
 752                     read_bytes(1)  # kind
 753             if flags & 0x80 != 0:
 754                 # Param names present
 755                 for _ in range(param_count):
 756                     u30()  # param name
 757             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 758             method_infos.append(mi)
 759
 760         # Metadata
 761         metadata_count = u30()
 762         for _c in range(metadata_count):
 763             u30()  # name
 764             item_count = u30()
 765             for _c2 in range(item_count):
 766                 u30()  # key
 767                 u30()  # value
 768
 769         def parse_traits_info():
 770             trait_name_idx = u30()
 771             kind_full = read_byte()
 772             kind = kind_full & 0x0f
 773             attrs = kind_full >> 4
 774             methods = {}
 775             if kind in [0x00, 0x06]:  # Slot or Const
 776                 u30()  # Slot id
 777                 u30()  # type_name_idx
 778                 vindex = u30()
 779                 if vindex != 0:
 780                     read_byte()  # vkind
 781             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 782                 u30()  # disp_id
 783                 method_idx = u30()
 784                 methods[multinames[trait_name_idx]] = method_idx
 785             elif kind == 0x04:  # Class
 786                 u30()  # slot_id
 787                 u30()  # classi
 788             elif kind == 0x05:  # Function
 789                 u30()  # slot_id
 790                 function_idx = u30()
 791                 methods[function_idx] = multinames[trait_name_idx]
 792             else:
 793                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 794
 795             if attrs & 0x4 != 0:  # Metadata present
 796                 metadata_count = u30()
 797                 for _c3 in range(metadata_count):
 798                     u30()  # metadata index
 799
 800             return methods
 801
 802         # Classes
 803         TARGET_CLASSNAME = u'SignatureDecipher'
 804         searched_idx = multinames.index(TARGET_CLASSNAME)
 805         searched_class_id = None
 806         class_count = u30()
 807         for class_id in range(class_count):
 808             name_idx = u30()
 809             if name_idx == searched_idx:
 810                 # We found the class we're looking for!
 811                 searched_class_id = class_id
 812             u30()  # super_name idx
 813             flags = read_byte()
 814             if flags & 0x08 != 0:  # Protected namespace is present
 815                 u30()  # protected_ns_idx
 816             intrf_count = u30()
 817             for _c2 in range(intrf_count):
 818                 u30()
 819             u30()  # iinit
 820             trait_count = u30()
 821             for _c2 in range(trait_count):
 822                 parse_traits_info()
 823
 824         if searched_class_id is None:
 825             raise ExtractorError(u'Target class %r not found' %
 826                                  TARGET_CLASSNAME)
 827
 828         method_names = {}
 829         method_idxs = {}
 830         for class_id in range(class_count):
 831             u30()  # cinit
 832             trait_count = u30()
 833             for _c2 in range(trait_count):
 834                 trait_methods = parse_traits_info()
 835                 if class_id == searched_class_id:
 836                     method_names.update(trait_methods.items())
 837                     method_idxs.update(dict(
 838                         (idx, name)
 839                         for name, idx in trait_methods.items()))
 840
 841         # Scripts
 842         script_count = u30()
 843         for _c in range(script_count):
 844             u30()  # init
 845             trait_count = u30()
 846             for _c2 in range(trait_count):
 847                 parse_traits_info()
 848
 849         # Method bodies
 850         method_body_count = u30()
 851         Method = collections.namedtuple('Method', ['code', 'local_count'])
 852         methods = {}
 853         for _c in range(method_body_count):
 854             method_idx = u30()
 855             u30()  # max_stack
 856             local_count = u30()
 857             u30()  # init_scope_depth
 858             u30()  # max_scope_depth
 859             code_length = u30()
 860             code = read_bytes(code_length)
 861             if method_idx in method_idxs:
 862                 m = Method(code, local_count)
 863                 methods[method_idxs[method_idx]] = m
 864             exception_count = u30()
 865             for _c2 in range(exception_count):
 866                 u30()  # from
 867                 u30()  # to
 868                 u30()  # target
 869                 u30()  # exc_type
 870                 u30()  # var_name
 871             trait_count = u30()
 872             for _c2 in range(trait_count):
 873                 parse_traits_info()
 874
 875         assert p + code_reader.tell() == len(code_tag)
 876         assert len(methods) == len(method_idxs)
 877
 878         method_pyfunctions = {}
 879
 880         def extract_function(func_name):
 881             if func_name in method_pyfunctions:
 882                 return method_pyfunctions[func_name]
 883             if func_name not in methods:
 884                 raise ExtractorError(u'Cannot find function %r' % func_name)
 885             m = methods[func_name]
 886
 887             def resfunc(args):
 888                 registers = ['(this)'] + list(args) + [None] * m.local_count
 889                 stack = []
 890                 coder = io.BytesIO(m.code)
 891                 while True:
 892                     opcode = struct.unpack('!B', coder.read(1))[0]
 893                     if opcode == 36:  # pushbyte
 894                         v = struct.unpack('!B', coder.read(1))[0]
 895                         stack.append(v)
 896                     elif opcode == 44:  # pushstring
 897                         idx = u30(coder)
 898                         stack.append(constant_strings[idx])
 899                     elif opcode == 48:  # pushscope
 900                         # We don't implement the scope register, so we'll just
 901                         # ignore the popped value
 902                         stack.pop()
 903                     elif opcode == 70:  # callproperty
 904                         index = u30(coder)
 905                         mname = multinames[index]
 906                         arg_count = u30(coder)
 907                         args = list(reversed(
 908                             [stack.pop() for _ in range(arg_count)]))
 909                         obj = stack.pop()
 910                         if mname == u'split':
 911                             assert len(args) == 1
 912                             assert isinstance(args[0], compat_str)
 913                             assert isinstance(obj, compat_str)
 914                             if args[0] == u'':
 915                                 res = list(obj)
 916                             else:
 917                                 res = obj.split(args[0])
 918                             stack.append(res)
 919                         elif mname == u'slice':
 920                             assert len(args) == 1
 921                             assert isinstance(args[0], int)
 922                             assert isinstance(obj, list)
 923                             res = obj[args[0]:]
 924                             stack.append(res)
 925                         elif mname == u'join':
 926                             assert len(args) == 1
 927                             assert isinstance(args[0], compat_str)
 928                             assert isinstance(obj, list)
 929                             res = args[0].join(obj)
 930                             stack.append(res)
 931                         elif mname in method_pyfunctions:
 932                             stack.append(method_pyfunctions[mname](args))
 933                         else:
 934                             raise NotImplementedError(
 935                                 u'Unsupported property %r on %r'
 936                                 % (mname, obj))
 937                     elif opcode == 72:  # returnvalue
 938                         res = stack.pop()
 939                         return res
 940                     elif opcode == 79:  # callpropvoid
 941                         index = u30(coder)
 942                         mname = multinames[index]
 943                         arg_count = u30(coder)
 944                         args = list(reversed(
 945                             [stack.pop() for _ in range(arg_count)]))
 946                         obj = stack.pop()
 947                         if mname == u'reverse':
 948                             assert isinstance(obj, list)
 949                             obj.reverse()
 950                         else:
 951                             raise NotImplementedError(
 952                                 u'Unsupported (void) property %r on %r'
 953                                 % (mname, obj))
 954                     elif opcode == 93:  # findpropstrict
 955                         index = u30(coder)
 956                         mname = multinames[index]
 957                         res = extract_function(mname)
 958                         stack.append(res)
 959                     elif opcode == 97:  # setproperty
 960                         index = u30(coder)
 961                         value = stack.pop()
 962                         idx = stack.pop()
 963                         obj = stack.pop()
 964                         assert isinstance(obj, list)
 965                         assert isinstance(idx, int)
 966                         obj[idx] = value
 967                     elif opcode == 98:  # getlocal
 968                         index = u30(coder)
 969                         stack.append(registers[index])
 970                     elif opcode == 99:  # setlocal
 971                         index = u30(coder)
 972                         value = stack.pop()
 973                         registers[index] = value
 974                     elif opcode == 102:  # getproperty
 975                         index = u30(coder)
 976                         pname = multinames[index]
 977                         if pname == u'length':
 978                             obj = stack.pop()
 979                             assert isinstance(obj, list)
 980                             stack.append(len(obj))
 981                         else:  # Assume attribute access
 982                             idx = stack.pop()
 983                             assert isinstance(idx, int)
 984                             obj = stack.pop()
 985                             assert isinstance(obj, list)
 986                             stack.append(obj[idx])
 987                     elif opcode == 128:  # coerce
 988                         u30(coder)
 989                     elif opcode == 133:  # coerce_s
 990                         assert isinstance(stack[-1], (type(None), compat_str))
 991                     elif opcode == 164:  # modulo
 992                         value2 = stack.pop()
 993                         value1 = stack.pop()
 994                         res = value1 % value2
 995                         stack.append(res)
 996                     elif opcode == 208:  # getlocal_0
 997                         stack.append(registers[0])
 998                     elif opcode == 209:  # getlocal_1
 999                         stack.append(registers[1])
1000                     elif opcode == 210:  # getlocal_2
1001                         stack.append(registers[2])
1002                     elif opcode == 211:  # getlocal_3
1003                         stack.append(registers[3])
1004                     elif opcode == 214:  # setlocal_2
1005                         registers[2] = stack.pop()
1006                     elif opcode == 215:  # setlocal_3
1007                         registers[3] = stack.pop()
1008                     else:
1009                         raise NotImplementedError(
1010                             u'Unsupported opcode %d' % opcode)
1011
1012             method_pyfunctions[func_name] = resfunc
1013             return resfunc
1014
1015         initial_function = extract_function(u'decipher')
1016         return lambda s: initial_function([s])
1017
1018     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1019         """Turn the encrypted s field into a working signature"""
1020
1021         if player_url is not None:
1022             if player_url.startswith(u'//'):
1023                 player_url = u'https:' + player_url
1024             try:
1025                 player_id = (player_url, len(s))
1026                 if player_id not in self._player_cache:
1027                     func = self._extract_signature_function(
1028                         video_id, player_url, len(s)
1029                     )
1030                     self._player_cache[player_id] = func
1031                 func = self._player_cache[player_id]
1032                 if self._downloader.params.get('youtube_print_sig_code'):
1033                     self._print_sig_code(func, len(s))
1034                 return func(s)
1035             except Exception:
1036                 tb = traceback.format_exc()
1037                 self._downloader.report_warning(
1038                     u'Automatic signature extraction failed: ' + tb)
1039
1040             self._downloader.report_warning(
1041                 u'Warning: Falling back to static signature algorithm')
1042
1043         return self._static_decrypt_signature(
1044             s, video_id, player_url, age_gate)
1045
1046     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1047         if age_gate:
1048             # The videos with age protection use another player, so the
1049             # algorithms can be different.
1050             if len(s) == 86:
1051                 return s[2:63] + s[82] + s[64:82] + s[63]
1052
1053         if len(s) == 93:
1054             return s[86:29:-1] + s[88] + s[28:5:-1]
1055         elif len(s) == 92:
1056             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1057         elif len(s) == 91:
1058             return s[84:27:-1] + s[86] + s[26:5:-1]
1059         elif len(s) == 90:
1060             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1061         elif len(s) == 89:
1062             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1063         elif len(s) == 88:
1064             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1065         elif len(s) == 87:
1066             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1067         elif len(s) == 86:
1068             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1069         elif len(s) == 85:
1070             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1071         elif len(s) == 84:
1072             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1073         elif len(s) == 83:
1074             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1075         elif len(s) == 82:
1076             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1077         elif len(s) == 81:
1078             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1079         elif len(s) == 80:
1080             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1081         elif len(s) == 79:
1082             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1083
1084         else:
1085             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1086
1087     def _get_available_subtitles(self, video_id, webpage):
1088         try:
1089             sub_list = self._download_webpage(
1090                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1091                 video_id, note=False)
1092         except ExtractorError as err:
1093             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1094             return {}
1095         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1096
1097         sub_lang_list = {}
1098         for l in lang_list:
1099             lang = l[1]
1100             params = compat_urllib_parse.urlencode({
1101                 'lang': lang,
1102                 'v': video_id,
1103                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1104                 'name': l[0].encode('utf-8'),
1105             })
1106             url = u'http://www.youtube.com/api/timedtext?' + params
1107             sub_lang_list[lang] = url
1108         if not sub_lang_list:
1109             self._downloader.report_warning(u'video doesn\'t have subtitles')
1110             return {}
1111         return sub_lang_list
1112
1113     def _get_available_automatic_caption(self, video_id, webpage):
1114         """We need the webpage for getting the captions url, pass it as an
1115            argument to speed up the process."""
1116         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1117         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1118         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1119         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1120         if mobj is None:
1121             self._downloader.report_warning(err_msg)
1122             return {}
1123         player_config = json.loads(mobj.group(1))
1124         try:
1125             args = player_config[u'args']
1126             caption_url = args[u'ttsurl']
1127             timestamp = args[u'timestamp']
1128             # We get the available subtitles
1129             list_params = compat_urllib_parse.urlencode({
1130                 'type': 'list',
1131                 'tlangs': 1,
1132                 'asrs': 1,
1133             })
1134             list_url = caption_url + '&' + list_params
1135             list_page = self._download_webpage(list_url, video_id)
1136             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1137             original_lang_node = caption_list.find('track')
1138             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1139                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1140                 return {}
1141             original_lang = original_lang_node.attrib['lang_code']
1142
1143             sub_lang_list = {}
1144             for lang_node in caption_list.findall('target'):
1145                 sub_lang = lang_node.attrib['lang_code']
1146                 params = compat_urllib_parse.urlencode({
1147                     'lang': original_lang,
1148                     'tlang': sub_lang,
1149                     'fmt': sub_format,
1150                     'ts': timestamp,
1151                     'kind': 'asr',
1152                 })
1153                 sub_lang_list[sub_lang] = caption_url + '&' + params
1154             return sub_lang_list
1155         # An extractor error can be raise by the download process if there are
1156         # no automatic captions but there are subtitles
1157         except (KeyError, ExtractorError):
1158             self._downloader.report_warning(err_msg)
1159             return {}
1160
1161     def _print_formats(self, formats):
1162         print('Available formats:')
1163         for x in formats:
1164             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1165                                         self._video_dimensions.get(x, '???'),
1166                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1167
1168     def _extract_id(self, url):
1169         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1170         if mobj is None:
1171             raise ExtractorError(u'Invalid URL: %s' % url)
1172         video_id = mobj.group(2)
1173         return video_id
1174
1175     def _get_video_url_list(self, url_map):
1176         """
1177         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1178         with the requested formats.
1179         """
1180         req_format = self._downloader.params.get('format', None)
1181         format_limit = self._downloader.params.get('format_limit', None)
1182         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1183         if format_limit is not None and format_limit in available_formats:
1184             format_list = available_formats[available_formats.index(format_limit):]
1185         else:
1186             format_list = available_formats
1187         existing_formats = [x for x in format_list if x in url_map]
1188         if len(existing_formats) == 0:
1189             raise ExtractorError(u'no known formats available for video')
1190         if self._downloader.params.get('listformats', None):
1191             self._print_formats(existing_formats)
1192             return
1193         if req_format is None or req_format == 'best':
1194             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1195         elif req_format == 'worst':
1196             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1197         elif req_format in ('-1', 'all'):
1198             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1199         else:
1200             # Specific formats. We pick the first in a slash-delimeted sequence.
1201             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1202             # available in the specified format. For example,
1203             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1204             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1205             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1206             req_formats = req_format.split('/')
1207             video_url_list = None
1208             for rf in req_formats:
1209                 if rf in url_map:
1210                     video_url_list = [(rf, url_map[rf])]
1211                     break
1212                 if rf in self._video_formats_map:
1213                     for srf in self._video_formats_map[rf]:
1214                         if srf in url_map:
1215                             video_url_list = [(srf, url_map[srf])]
1216                             break
1217                     else:
1218                         continue
1219                     break
1220             if video_url_list is None:
1221                 raise ExtractorError(u'requested format not available')
1222         return video_url_list
1223
1224     def _extract_from_m3u8(self, manifest_url, video_id):
1225         url_map = {}
1226         def _get_urls(_manifest):
1227             lines = _manifest.split('\n')
1228             urls = filter(lambda l: l and not l.startswith('#'),
1229                             lines)
1230             return urls
1231         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1232         formats_urls = _get_urls(manifest)
1233         for format_url in formats_urls:
1234             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1235             url_map[itag] = format_url
1236         return url_map
1237
1238     def _extract_annotations(self, video_id):
1239         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1240         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1241
1242     def _real_extract(self, url):
1243         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1244         mobj = re.search(self._NEXT_URL_RE, url)
1245         if mobj:
1246             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1247         video_id = self._extract_id(url)
1248
1249         # Get video webpage
1250         self.report_video_webpage_download(video_id)
1251         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1252         request = compat_urllib_request.Request(url)
1253         try:
1254             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1255         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1256             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1257
1258         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1259
1260         # Attempt to extract SWF player URL
1261         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1262         if mobj is not None:
1263             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1264         else:
1265             player_url = None
1266
1267         # Get video info
1268         self.report_video_info_webpage_download(video_id)
1269         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1270             self.report_age_confirmation()
1271             age_gate = True
1272             # We simulate the access to the video from www.youtube.com/v/{video_id}
1273             # this can be viewed without login into Youtube
1274             data = compat_urllib_parse.urlencode({'video_id': video_id,
1275                                                   'el': 'embedded',
1276                                                   'gl': 'US',
1277                                                   'hl': 'en',
1278                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1279                                                   'asv': 3,
1280                                                   'sts':'1588',
1281                                                   })
1282             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1283             video_info_webpage = self._download_webpage(video_info_url, video_id,
1284                                     note=False,
1285                                     errnote='unable to download video info webpage')
1286             video_info = compat_parse_qs(video_info_webpage)
1287         else:
1288             age_gate = False
1289             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1290                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1291                         % (video_id, el_type))
1292                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1293                                         note=False,
1294                                         errnote='unable to download video info webpage')
1295                 video_info = compat_parse_qs(video_info_webpage)
1296                 if 'token' in video_info:
1297                     break
1298         if 'token' not in video_info:
1299             if 'reason' in video_info:
1300                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1301             else:
1302                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1303
1304         if 'view_count' in video_info:
1305             view_count = int(video_info['view_count'][0])
1306         else:
1307             view_count = None
1308
1309         # Check for "rental" videos
1310         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1311             raise ExtractorError(u'"rental" videos not supported')
1312
1313         # Start extracting information
1314         self.report_information_extraction(video_id)
1315
1316         # uploader
1317         if 'author' not in video_info:
1318             raise ExtractorError(u'Unable to extract uploader name')
1319         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1320
1321         # uploader_id
1322         video_uploader_id = None
1323         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1324         if mobj is not None:
1325             video_uploader_id = mobj.group(1)
1326         else:
1327             self._downloader.report_warning(u'unable to extract uploader nickname')
1328
1329         # title
1330         if 'title' in video_info:
1331             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1332         else:
1333             self._downloader.report_warning(u'Unable to extract video title')
1334             video_title = u'_'
1335
1336         # thumbnail image
1337         # We try first to get a high quality image:
1338         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1339                             video_webpage, re.DOTALL)
1340         if m_thumb is not None:
1341             video_thumbnail = m_thumb.group(1)
1342         elif 'thumbnail_url' not in video_info:
1343             self._downloader.report_warning(u'unable to extract video thumbnail')
1344             video_thumbnail = None
1345         else:   # don't panic if we can't find it
1346             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1347
1348         # upload date
1349         upload_date = None
1350         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1351         if mobj is not None:
1352             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1353             upload_date = unified_strdate(upload_date)
1354
1355         # description
1356         video_description = get_element_by_id("eow-description", video_webpage)
1357         if video_description:
1358             video_description = clean_html(video_description)
1359         else:
1360             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1361             if fd_mobj:
1362                 video_description = unescapeHTML(fd_mobj.group(1))
1363             else:
1364                 video_description = u''
1365
1366         # subtitles
1367         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1368
1369         if self._downloader.params.get('listsubtitles', False):
1370             self._list_available_subtitles(video_id, video_webpage)
1371             return
1372
1373         if 'length_seconds' not in video_info:
1374             self._downloader.report_warning(u'unable to extract video duration')
1375             video_duration = ''
1376         else:
1377             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1378
1379         # annotations
1380         video_annotations = None
1381         if self._downloader.params.get('writeannotations', False):
1382                 video_annotations = self._extract_annotations(video_id)
1383
1384         # Decide which formats to download
1385
1386         try:
1387             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1388             if not mobj:
1389                 raise ValueError('Could not find vevo ID')
1390             info = json.loads(mobj.group(1))
1391             args = info['args']
1392             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1393             # this signatures are encrypted
1394             if 'url_encoded_fmt_stream_map' not in args:
1395                 raise ValueError(u'No stream_map present')  # caught below
1396             re_signature = re.compile(r'[&,]s=')
1397             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1398             if m_s is not None:
1399                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1400                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1401             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1402             if m_s is not None:
1403                 if 'adaptive_fmts' in video_info:
1404                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1405                 else:
1406                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1407         except ValueError:
1408             pass
1409
1410         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1411             self.report_rtmp_download()
1412             video_url_list = [(None, video_info['conn'][0])]
1413         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1414             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1415             if 'rtmpe%3Dyes' in encoded_url_map:
1416                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1417             url_map = {}
1418             for url_data_str in encoded_url_map.split(','):
1419                 url_data = compat_parse_qs(url_data_str)
1420                 if 'itag' in url_data and 'url' in url_data:
1421                     url = url_data['url'][0]
1422                     if 'sig' in url_data:
1423                         url += '&signature=' + url_data['sig'][0]
1424                     elif 's' in url_data:
1425                         encrypted_sig = url_data['s'][0]
1426                         if self._downloader.params.get('verbose'):
1427                             if age_gate:
1428                                 if player_url is None:
1429                                     player_version = 'unknown'
1430                                 else:
1431                                     player_version = self._search_regex(
1432                                         r'-(.+)\.swf$', player_url,
1433                                         u'flash player', fatal=False)
1434                                 player_desc = 'flash player %s' % player_version
1435                             else:
1436                                 player_version = self._search_regex(
1437                                     r'html5player-(.+?)\.js', video_webpage,
1438                                     'html5 player', fatal=False)
1439                                 player_desc = u'html5 player %s' % player_version
1440
1441                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1442                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1443                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1444
1445                         if not age_gate:
1446                             jsplayer_url_json = self._search_regex(
1447                                 r'"assets":.+?"js":\s*("[^"]+")',
1448                                 video_webpage, u'JS player URL')
1449                             player_url = json.loads(jsplayer_url_json)
1450
1451                         signature = self._decrypt_signature(
1452                             encrypted_sig, video_id, player_url, age_gate)
1453                         url += '&signature=' + signature
1454                     if 'ratebypass' not in url:
1455                         url += '&ratebypass=yes'
1456                     url_map[url_data['itag'][0]] = url
1457             video_url_list = self._get_video_url_list(url_map)
1458             if not video_url_list:
1459                 return
1460         elif video_info.get('hlsvp'):
1461             manifest_url = video_info['hlsvp'][0]
1462             url_map = self._extract_from_m3u8(manifest_url, video_id)
1463             video_url_list = self._get_video_url_list(url_map)
1464             if not video_url_list:
1465                 return
1466
1467         else:
1468             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1469
1470         results = []
1471         for itag, video_real_url in video_url_list:
1472             # Extension
1473             video_extension = self._video_extensions.get(itag, 'flv')
1474
1475             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1476                                               self._video_dimensions.get(itag, '???'),
1477                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1478
1479             results.append({
1480                 'id':       video_id,
1481                 'url':      video_real_url,
1482                 'uploader': video_uploader,
1483                 'uploader_id': video_uploader_id,
1484                 'upload_date':  upload_date,
1485                 'title':    video_title,
1486                 'ext':      video_extension,
1487                 'format':   video_format,
1488                 'format_id': itag,
1489                 'thumbnail':    video_thumbnail,
1490                 'description':  video_description,
1491                 'player_url':   player_url,
1492                 'subtitles':    video_subtitles,
1493                 'duration':     video_duration,
1494                 'age_limit':    18 if age_gate else 0,
1495                 'annotations':  video_annotations,
1496                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1497                 'view_count': view_count,
1498             })
1499         return results
1500
1501 class YoutubePlaylistIE(InfoExtractor):
1502     IE_DESC = u'YouTube.com playlists'
1503     _VALID_URL = r"""(?:
1504                         (?:https?://)?
1505                         (?:\w+\.)?
1506                         youtube\.com/
1507                         (?:
1508                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1509                            \? (?:.*?&)*? (?:p|a|list)=
1510                         |  p/
1511                         )
1512                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1513                         .*
1514                      |
1515                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1516                      )"""
1517     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1518     _MAX_RESULTS = 50
1519     IE_NAME = u'youtube:playlist'
1520
1521     @classmethod
1522     def suitable(cls, url):
1523         """Receives a URL and returns True if suitable for this IE."""
1524         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1525
1526     def _real_extract(self, url):
1527         # Extract playlist id
1528         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1529         if mobj is None:
1530             raise ExtractorError(u'Invalid URL: %s' % url)
1531         playlist_id = mobj.group(1) or mobj.group(2)
1532
1533         # Check if it's a video-specific URL
1534         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1535         if 'v' in query_dict:
1536             video_id = query_dict['v'][0]
1537             if self._downloader.params.get('noplaylist'):
1538                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1539                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1540             else:
1541                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1542
1543         # Download playlist videos from API
1544         videos = []
1545
1546         for page_num in itertools.count(1):
1547             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1548             if start_index >= 1000:
1549                 self._downloader.report_warning(u'Max number of results reached')
1550                 break
1551             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1552             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1553
1554             try:
1555                 response = json.loads(page)
1556             except ValueError as err:
1557                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1558
1559             if 'feed' not in response:
1560                 raise ExtractorError(u'Got a malformed response from YouTube API')
1561             playlist_title = response['feed']['title']['$t']
1562             if 'entry' not in response['feed']:
1563                 # Number of videos is a multiple of self._MAX_RESULTS
1564                 break
1565
1566             for entry in response['feed']['entry']:
1567                 index = entry['yt$position']['$t']
1568                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1569                     videos.append((
1570                         index,
1571                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1572                     ))
1573
1574         videos = [v[1] for v in sorted(videos)]
1575
1576         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1577         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1578
1579
1580 class YoutubeChannelIE(InfoExtractor):
1581     IE_DESC = u'YouTube.com channels'
1582     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1583     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1584     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1585     IE_NAME = u'youtube:channel'
1586
1587     def extract_videos_from_page(self, page):
1588         ids_in_page = []
1589         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1590             if mobj.group(1) not in ids_in_page:
1591                 ids_in_page.append(mobj.group(1))
1592         return ids_in_page
1593
1594     def _real_extract(self, url):
1595         # Extract channel id
1596         mobj = re.match(self._VALID_URL, url)
1597         if mobj is None:
1598             raise ExtractorError(u'Invalid URL: %s' % url)
1599
1600         # Download channel page
1601         channel_id = mobj.group(1)
1602         video_ids = []
1603         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1604         channel_page = self._download_webpage(url, channel_id)
1605         if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
1606             autogenerated = True
1607         else:
1608             autogenerated = False
1609
1610         if autogenerated:
1611             # The videos are contained in a single page
1612             # the ajax pages can't be used, they are empty
1613             video_ids = self.extract_videos_from_page(channel_page)
1614         else:
1615             # Download all channel pages using the json-based channel_ajax query
1616             for pagenum in itertools.count(1):
1617                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1618                 page = self._download_webpage(url, channel_id,
1619                                               u'Downloading page #%s' % pagenum)
1620
1621                 page = json.loads(page)
1622
1623                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1624                 video_ids.extend(ids_in_page)
1625
1626                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1627                     break
1628
1629         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1630
1631         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1632         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1633         return [self.playlist_result(url_entries, channel_id)]
1634
1635
1636 class YoutubeUserIE(InfoExtractor):
1637     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1638     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1639     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1640     _GDATA_PAGE_SIZE = 50
1641     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1642     IE_NAME = u'youtube:user'
1643
1644     @classmethod
1645     def suitable(cls, url):
1646         # Don't return True if the url can be extracted with other youtube
1647         # extractor, the regex would is too permissive and it would match.
1648         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1649         if any(ie.suitable(url) for ie in other_ies): return False
1650         else: return super(YoutubeUserIE, cls).suitable(url)
1651
1652     def _real_extract(self, url):
1653         # Extract username
1654         mobj = re.match(self._VALID_URL, url)
1655         if mobj is None:
1656             raise ExtractorError(u'Invalid URL: %s' % url)
1657
1658         username = mobj.group(1)
1659
1660         # Download video ids using YouTube Data API. Result size per
1661         # query is limited (currently to 50 videos) so we need to query
1662         # page by page until there are no video ids - it means we got
1663         # all of them.
1664
1665         video_ids = []
1666
1667         for pagenum in itertools.count(0):
1668             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1669
1670             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1671             page = self._download_webpage(gdata_url, username,
1672                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1673
1674             try:
1675                 response = json.loads(page)
1676             except ValueError as err:
1677                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1678             if 'entry' not in response['feed']:
1679                 # Number of videos is a multiple of self._MAX_RESULTS
1680                 break
1681
1682             # Extract video identifiers
1683             ids_in_page = []
1684             for entry in response['feed']['entry']:
1685                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1686             video_ids.extend(ids_in_page)
1687
1688             # A little optimization - if current page is not
1689             # "full", ie. does not contain PAGE_SIZE video ids then
1690             # we can assume that this page is the last one - there
1691             # are no more ids on further pages - no need to query
1692             # again.
1693
1694             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1695                 break
1696
1697         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1698         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1699         return [self.playlist_result(url_results, playlist_title = username)]
1700
1701 class YoutubeSearchIE(SearchInfoExtractor):
1702     IE_DESC = u'YouTube.com searches'
1703     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1704     _MAX_RESULTS = 1000
1705     IE_NAME = u'youtube:search'
1706     _SEARCH_KEY = 'ytsearch'
1707
1708     def report_download_page(self, query, pagenum):
1709         """Report attempt to download search page with given number."""
1710         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1711
1712     def _get_n_results(self, query, n):
1713         """Get a specified number of results for a query"""
1714
1715         video_ids = []
1716         pagenum = 0
1717         limit = n
1718
1719         while (50 * pagenum) < limit:
1720             self.report_download_page(query, pagenum+1)
1721             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1722             request = compat_urllib_request.Request(result_url)
1723             try:
1724                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1725             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1726                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1727             api_response = json.loads(data)['data']
1728
1729             if not 'items' in api_response:
1730                 raise ExtractorError(u'[youtube] No video results')
1731
1732             new_ids = list(video['id'] for video in api_response['items'])
1733             video_ids += new_ids
1734
1735             limit = min(n, api_response['totalItems'])
1736             pagenum += 1
1737
1738         if len(video_ids) > n:
1739             video_ids = video_ids[:n]
1740         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1741         return self.playlist_result(videos, query)
1742
1743 class YoutubeSearchDateIE(YoutubeSearchIE):
1744     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1745     _SEARCH_KEY = 'ytsearchdate'
1746     IE_DESC = u'YouTube.com searches, newest videos first'
1747
1748 class YoutubeShowIE(InfoExtractor):
1749     IE_DESC = u'YouTube.com (multi-season) shows'
1750     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1751     IE_NAME = u'youtube:show'
1752
1753     def _real_extract(self, url):
1754         mobj = re.match(self._VALID_URL, url)
1755         show_name = mobj.group(1)
1756         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1757         # There's one playlist for each season of the show
1758         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1759         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1760         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1761
1762
1763 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1764     """
1765     Base class for extractors that fetch info from
1766     http://www.youtube.com/feed_ajax
1767     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1768     """
1769     _LOGIN_REQUIRED = True
1770     _PAGING_STEP = 30
1771     # use action_load_personal_feed instead of action_load_system_feed
1772     _PERSONAL_FEED = False
1773
1774     @property
1775     def _FEED_TEMPLATE(self):
1776         action = 'action_load_system_feed'
1777         if self._PERSONAL_FEED:
1778             action = 'action_load_personal_feed'
1779         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1780
1781     @property
1782     def IE_NAME(self):
1783         return u'youtube:%s' % self._FEED_NAME
1784
1785     def _real_initialize(self):
1786         self._login()
1787
1788     def _real_extract(self, url):
1789         feed_entries = []
1790         # The step argument is available only in 2.7 or higher
1791         for i in itertools.count(0):
1792             paging = i*self._PAGING_STEP
1793             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1794                                           u'%s feed' % self._FEED_NAME,
1795                                           u'Downloading page %s' % i)
1796             info = json.loads(info)
1797             feed_html = info['feed_html']
1798             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1799             ids = orderedSet(m.group(1) for m in m_ids)
1800             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1801             if info['paging'] is None:
1802                 break
1803         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1804
1805 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1806     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1807     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1808     _FEED_NAME = 'subscriptions'
1809     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1810
1811 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1814     _FEED_NAME = 'recommended'
1815     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1816
1817 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1818     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1820     _FEED_NAME = 'watch_later'
1821     _PLAYLIST_TITLE = u'Youtube Watch Later'
1822     _PAGING_STEP = 100
1823     _PERSONAL_FEED = True
1824
1825 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1826     IE_NAME = u'youtube:favorites'
1827     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1828     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1829     _LOGIN_REQUIRED = True
1830
1831     def _real_extract(self, url):
1832         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1833         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1834         return self.url_result(playlist_id, 'YoutubePlaylist')
1835
1836
1837 class YoutubeTruncatedURLIE(InfoExtractor):
1838     IE_NAME = 'youtube:truncated_url'
1839     IE_DESC = False  # Do not list
1840     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1841
1842     def _real_extract(self, url):
1843         raise ExtractorError(
1844             u'Did you forget to quote the URL? Remember that & is a meta '
1845             u'character in most shells, so you want to put the URL in quotes, '
1846             u'like  youtube-dl '
1847             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1848             u' (or simply  youtube-dl BaW_jenozKc  ).',
1849             expected=True)