_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_element_by_id,
  31     ExtractorError,
  32     unescapeHTML,
  33     unified_strdate,
  34     orderedSet,
  35     write_json_file,
  36 )
  37
  38 class YoutubeBaseInfoExtractor(InfoExtractor):
  39     """Provide base functions for Youtube extractors"""
  40     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  41     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  42     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  43     _NETRC_MACHINE = 'youtube'
  44     # If True it will raise an error if no login info is provided
  45     _LOGIN_REQUIRED = False
  46
  47     def report_lang(self):
  48         """Report attempt to set language."""
  49         self.to_screen(u'Setting language')
  50
  51     def _set_language(self):
  52         request = compat_urllib_request.Request(self._LANG_URL)
  53         try:
  54             self.report_lang()
  55             compat_urllib_request.urlopen(request).read()
  56         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  57             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  58             return False
  59         return True
  60
  61     def _login(self):
  62         (username, password) = self._get_login_info()
  63         # No authentication to be performed
  64         if username is None:
  65             if self._LOGIN_REQUIRED:
  66                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  67             return False
  68
  69         request = compat_urllib_request.Request(self._LOGIN_URL)
  70         try:
  71             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  72         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  73             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  74             return False
  75
  76         galx = None
  77         dsh = None
  78         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  79         if match:
  80           galx = match.group(1)
  81         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  82         if match:
  83           dsh = match.group(1)
  84
  85         # Log in
  86         login_form_strs = {
  87                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  88                 u'Email': username,
  89                 u'GALX': galx,
  90                 u'Passwd': password,
  91                 u'PersistentCookie': u'yes',
  92                 u'_utf8': u'霱',
  93                 u'bgresponse': u'js_disabled',
  94                 u'checkConnection': u'',
  95                 u'checkedDomains': u'youtube',
  96                 u'dnConn': u'',
  97                 u'dsh': dsh,
  98                 u'pstMsg': u'0',
  99                 u'rmShown': u'1',
 100                 u'secTok': u'',
 101                 u'signIn': u'Sign in',
 102                 u'timeStmp': u'',
 103                 u'service': u'youtube',
 104                 u'uilel': u'3',
 105                 u'hl': u'en_US',
 106         }
 107         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 108         # chokes on unicode
 109         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 110         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 111         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 112         try:
 113             self.report_login()
 114             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 115             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 116                 self._downloader.report_warning(u'unable to log in: bad username or password')
 117                 return False
 118         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 119             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 120             return False
 121         return True
 122
 123     def _confirm_age(self):
 124         age_form = {
 125                 'next_url':     '/',
 126                 'action_confirm':   'Confirm',
 127                 }
 128         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 129         try:
 130             self.report_age_confirmation()
 131             compat_urllib_request.urlopen(request).read().decode('utf-8')
 132         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 133             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 134         return True
 135
 136     def _real_initialize(self):
 137         if self._downloader is None:
 138             return
 139         if not self._set_language():
 140             return
 141         if not self._login():
 142             return
 143         self._confirm_age()
 144
 145
 146 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 147     IE_DESC = u'YouTube.com'
 148     _VALID_URL = r"""^
 149                      (
 150                          (?:https?://)?                                       # http(s):// (optional)
 151                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 152                             tube\.majestyc\.net/|
 153                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 154                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 155                          (?:                                                  # the various things that can precede the ID:
 156                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 157                              |(?:                                             # or the v= param in all its forms
 158                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 159                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 160                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 161                                  v=
 162                              )
 163                          ))
 164                          |youtu\.be/                                          # just youtu.be/xxxx
 165                          )
 166                      )?                                                       # all until now is optional -> you can pass the naked ID
 167                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 168                      (?(1).+)?                                                # if we found the ID, everything can follow
 169                      $"""
 170     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 171     # Listed in order of quality
 172     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 173                           # Apple HTTP Live Streaming
 174                           '96', '95', '94', '93', '92', '132', '151',
 175                           # 3D
 176                           '85', '84', '102', '83', '101', '82', '100',
 177                           # Dash video
 178                           '138', '137', '248', '136', '247', '135', '246',
 179                           '245', '244', '134', '243', '133', '242', '160',
 180                           # Dash audio
 181                           '141', '172', '140', '171', '139',
 182                           ]
 183     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 184                                       # Apple HTTP Live Streaming
 185                                       '96', '95', '94', '93', '92', '132', '151',
 186                                       # 3D
 187                                       '85', '102', '84', '101', '83', '100', '82',
 188                                       # Dash video
 189                                       '138', '248', '137', '247', '136', '246', '245',
 190                                       '244', '135', '243', '134', '242', '133', '160',
 191                                       # Dash audio
 192                                       '172', '141', '171', '140', '139',
 193                                       ]
 194     _video_formats_map = {
 195         'flv': ['35', '34', '6', '5'],
 196         '3gp': ['36', '17', '13'],
 197         'mp4': ['38', '37', '22', '18'],
 198         'webm': ['46', '45', '44', '43'],
 199     }
 200     _video_extensions = {
 201         '13': '3gp',
 202         '17': '3gp',
 203         '18': 'mp4',
 204         '22': 'mp4',
 205         '36': '3gp',
 206         '37': 'mp4',
 207         '38': 'mp4',
 208         '43': 'webm',
 209         '44': 'webm',
 210         '45': 'webm',
 211         '46': 'webm',
 212
 213         # 3d videos
 214         '82': 'mp4',
 215         '83': 'mp4',
 216         '84': 'mp4',
 217         '85': 'mp4',
 218         '100': 'webm',
 219         '101': 'webm',
 220         '102': 'webm',
 221
 222         # Apple HTTP Live Streaming
 223         '92': 'mp4',
 224         '93': 'mp4',
 225         '94': 'mp4',
 226         '95': 'mp4',
 227         '96': 'mp4',
 228         '132': 'mp4',
 229         '151': 'mp4',
 230
 231         # Dash mp4
 232         '133': 'mp4',
 233         '134': 'mp4',
 234         '135': 'mp4',
 235         '136': 'mp4',
 236         '137': 'mp4',
 237         '138': 'mp4',
 238         '139': 'mp4',
 239         '140': 'mp4',
 240         '141': 'mp4',
 241         '160': 'mp4',
 242
 243         # Dash webm
 244         '171': 'webm',
 245         '172': 'webm',
 246         '242': 'webm',
 247         '243': 'webm',
 248         '244': 'webm',
 249         '245': 'webm',
 250         '246': 'webm',
 251         '247': 'webm',
 252         '248': 'webm',
 253     }
 254     _video_dimensions = {
 255         '5': '240x400',
 256         '6': '???',
 257         '13': '???',
 258         '17': '144x176',
 259         '18': '360x640',
 260         '22': '720x1280',
 261         '34': '360x640',
 262         '35': '480x854',
 263         '36': '240x320',
 264         '37': '1080x1920',
 265         '38': '3072x4096',
 266         '43': '360x640',
 267         '44': '480x854',
 268         '45': '720x1280',
 269         '46': '1080x1920',
 270         '82': '360p',
 271         '83': '480p',
 272         '84': '720p',
 273         '85': '1080p',
 274         '92': '240p',
 275         '93': '360p',
 276         '94': '480p',
 277         '95': '720p',
 278         '96': '1080p',
 279         '100': '360p',
 280         '101': '480p',
 281         '102': '720p',
 282         '132': '240p',
 283         '151': '72p',
 284         '133': '240p',
 285         '134': '360p',
 286         '135': '480p',
 287         '136': '720p',
 288         '137': '1080p',
 289         '138': '>1080p',
 290         '139': '48k',
 291         '140': '128k',
 292         '141': '256k',
 293         '160': '192p',
 294         '171': '128k',
 295         '172': '256k',
 296         '242': '240p',
 297         '243': '360p',
 298         '244': '480p',
 299         '245': '480p',
 300         '246': '480p',
 301         '247': '720p',
 302         '248': '1080p',
 303     }
 304     _special_itags = {
 305         '82': '3D',
 306         '83': '3D',
 307         '84': '3D',
 308         '85': '3D',
 309         '100': '3D',
 310         '101': '3D',
 311         '102': '3D',
 312         '133': 'DASH Video',
 313         '134': 'DASH Video',
 314         '135': 'DASH Video',
 315         '136': 'DASH Video',
 316         '137': 'DASH Video',
 317         '138': 'DASH Video',
 318         '139': 'DASH Audio',
 319         '140': 'DASH Audio',
 320         '141': 'DASH Audio',
 321         '160': 'DASH Video',
 322         '171': 'DASH Audio',
 323         '172': 'DASH Audio',
 324         '242': 'DASH Video',
 325         '243': 'DASH Video',
 326         '244': 'DASH Video',
 327         '245': 'DASH Video',
 328         '246': 'DASH Video',
 329         '247': 'DASH Video',
 330         '248': 'DASH Video',
 331     }
 332
 333     IE_NAME = u'youtube'
 334     _TESTS = [
 335         {
 336             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 337             u"file":  u"BaW_jenozKc.mp4",
 338             u"info_dict": {
 339                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 340                 u"uploader": u"Philipp Hagemeister",
 341                 u"uploader_id": u"phihag",
 342                 u"upload_date": u"20121002",
 343                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 344             }
 345         },
 346         {
 347             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 348             u"file":  u"1ltcDfZMA3U.flv",
 349             u"note": u"Test VEVO video (#897)",
 350             u"info_dict": {
 351                 u"upload_date": u"20070518",
 352                 u"title": u"Maps - It Will Find You",
 353                 u"description": u"Music video by Maps performing It Will Find You.",
 354                 u"uploader": u"MuteUSA",
 355                 u"uploader_id": u"MuteUSA"
 356             }
 357         },
 358         {
 359             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 360             u"file":  u"UxxajLWwzqY.mp4",
 361             u"note": u"Test generic use_cipher_signature video (#897)",
 362             u"info_dict": {
 363                 u"upload_date": u"20120506",
 364                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 365                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 366                 u"uploader": u"Icona Pop",
 367                 u"uploader_id": u"IconaPop"
 368             }
 369         },
 370         {
 371             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 372             u"file":  u"07FYdnEawAQ.mp4",
 373             u"note": u"Test VEVO video with age protection (#956)",
 374             u"info_dict": {
 375                 u"upload_date": u"20130703",
 376                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 377                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 378                 u"uploader": u"justintimberlakeVEVO",
 379                 u"uploader_id": u"justintimberlakeVEVO"
 380             }
 381         },
 382     ]
 383
 384
 385     @classmethod
 386     def suitable(cls, url):
 387         """Receives a URL and returns True if suitable for this IE."""
 388         if YoutubePlaylistIE.suitable(url): return False
 389         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 390
 391     def __init__(self, *args, **kwargs):
 392         super(YoutubeIE, self).__init__(*args, **kwargs)
 393         self._player_cache = {}
 394
 395     def report_video_webpage_download(self, video_id):
 396         """Report attempt to download video webpage."""
 397         self.to_screen(u'%s: Downloading video webpage' % video_id)
 398
 399     def report_video_info_webpage_download(self, video_id):
 400         """Report attempt to download video info webpage."""
 401         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 402
 403     def report_information_extraction(self, video_id):
 404         """Report attempt to extract video information."""
 405         self.to_screen(u'%s: Extracting video information' % video_id)
 406
 407     def report_unavailable_format(self, video_id, format):
 408         """Report extracted video URL."""
 409         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 410
 411     def report_rtmp_download(self):
 412         """Indicate the download will use the RTMP protocol."""
 413         self.to_screen(u'RTMP download detected')
 414
 415     def _extract_signature_function(self, video_id, player_url, slen):
 416         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 417                         player_url)
 418         player_type = id_m.group('ext')
 419         player_id = id_m.group('id')
 420
 421         # Read from filesystem cache
 422         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 423         assert os.path.basename(func_id) == func_id
 424         xdg_cache_home = os.environ.get('XDG_CACHE_HOME')
 425         if xdg_cache_home:
 426             userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl')
 427         else:
 428             userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl')
 429         cache_dir = self._downloader.params.get('cachedir', userCacheDir)
 430
 431         cache_enabled = cache_dir is not None
 432         if cache_enabled:
 433             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 434                                     u'youtube-sigfuncs',
 435                                     func_id + '.json')
 436             try:
 437                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 438                     cache_spec = json.load(cachef)
 439                 return lambda s: u''.join(s[i] for i in cache_spec)
 440             except IOError:
 441                 pass  # No cache available
 442
 443         if player_type == 'js':
 444             code = self._download_webpage(
 445                 player_url, video_id,
 446                 note=u'Downloading %s player %s' % (player_type, player_id),
 447                 errnote=u'Download of %s failed' % player_url)
 448             res = self._parse_sig_js(code)
 449         elif player_type == 'swf':
 450             urlh = self._request_webpage(
 451                 player_url, video_id,
 452                 note=u'Downloading %s player %s' % (player_type, player_id),
 453                 errnote=u'Download of %s failed' % player_url)
 454             code = urlh.read()
 455             res = self._parse_sig_swf(code)
 456         else:
 457             assert False, 'Invalid player type %r' % player_type
 458
 459         if cache_enabled:
 460             try:
 461                 test_string = u''.join(map(compat_chr, range(slen)))
 462                 cache_res = res(test_string)
 463                 cache_spec = [ord(c) for c in cache_res]
 464                 try:
 465                     os.makedirs(os.path.dirname(cache_fn))
 466                 except OSError as ose:
 467                     if ose.errno != errno.EEXIST:
 468                         raise
 469                 write_json_file(cache_spec, cache_fn)
 470             except Exception:
 471                 tb = traceback.format_exc()
 472                 self._downloader.report_warning(
 473                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 474
 475         return res
 476
 477     def _print_sig_code(self, func, slen):
 478         def gen_sig_code(idxs):
 479             def _genslice(start, end, step):
 480                 starts = u'' if start == 0 else str(start)
 481                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 482                 steps = u'' if step == 1 else (u':%d' % step)
 483                 return u's[%s%s%s]' % (starts, ends, steps)
 484
 485             step = None
 486             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 487                                     # set as soon as step is set
 488             for i, prev in zip(idxs[1:], idxs[:-1]):
 489                 if step is not None:
 490                     if i - prev == step:
 491                         continue
 492                     yield _genslice(start, prev, step)
 493                     step = None
 494                     continue
 495                 if i - prev in [-1, 1]:
 496                     step = i - prev
 497                     start = prev
 498                     continue
 499                 else:
 500                     yield u's[%d]' % prev
 501             if step is None:
 502                 yield u's[%d]' % i
 503             else:
 504                 yield _genslice(start, i, step)
 505
 506         test_string = u''.join(map(compat_chr, range(slen)))
 507         cache_res = func(test_string)
 508         cache_spec = [ord(c) for c in cache_res]
 509         expr_code = u' + '.join(gen_sig_code(cache_spec))
 510         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 511         self.to_screen(u'Extracted signature function:\n' + code)
 512
 513     def _parse_sig_js(self, jscode):
 514         funcname = self._search_regex(
 515             r'signature=([a-zA-Z]+)', jscode,
 516             u'Initial JS player signature function name')
 517
 518         functions = {}
 519
 520         def argidx(varname):
 521             return string.lowercase.index(varname)
 522
 523         def interpret_statement(stmt, local_vars, allow_recursion=20):
 524             if allow_recursion < 0:
 525                 raise ExtractorError(u'Recursion limit reached')
 526
 527             if stmt.startswith(u'var '):
 528                 stmt = stmt[len(u'var '):]
 529             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 530                              r'=(?P<expr>.*)$', stmt)
 531             if ass_m:
 532                 if ass_m.groupdict().get('index'):
 533                     def assign(val):
 534                         lvar = local_vars[ass_m.group('out')]
 535                         idx = interpret_expression(ass_m.group('index'),
 536                                                    local_vars, allow_recursion)
 537                         assert isinstance(idx, int)
 538                         lvar[idx] = val
 539                         return val
 540                     expr = ass_m.group('expr')
 541                 else:
 542                     def assign(val):
 543                         local_vars[ass_m.group('out')] = val
 544                         return val
 545                     expr = ass_m.group('expr')
 546             elif stmt.startswith(u'return '):
 547                 assign = lambda v: v
 548                 expr = stmt[len(u'return '):]
 549             else:
 550                 raise ExtractorError(
 551                     u'Cannot determine left side of statement in %r' % stmt)
 552
 553             v = interpret_expression(expr, local_vars, allow_recursion)
 554             return assign(v)
 555
 556         def interpret_expression(expr, local_vars, allow_recursion):
 557             if expr.isdigit():
 558                 return int(expr)
 559
 560             if expr.isalpha():
 561                 return local_vars[expr]
 562
 563             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 564             if m:
 565                 member = m.group('member')
 566                 val = local_vars[m.group('in')]
 567                 if member == 'split("")':
 568                     return list(val)
 569                 if member == 'join("")':
 570                     return u''.join(val)
 571                 if member == 'length':
 572                     return len(val)
 573                 if member == 'reverse()':
 574                     return val[::-1]
 575                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 576                 if slice_m:
 577                     idx = interpret_expression(
 578                         slice_m.group('idx'), local_vars, allow_recursion-1)
 579                     return val[idx:]
 580
 581             m = re.match(
 582                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 583             if m:
 584                 val = local_vars[m.group('in')]
 585                 idx = interpret_expression(m.group('idx'), local_vars,
 586                                            allow_recursion-1)
 587                 return val[idx]
 588
 589             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 590             if m:
 591                 a = interpret_expression(m.group('a'),
 592                                          local_vars, allow_recursion)
 593                 b = interpret_expression(m.group('b'),
 594                                          local_vars, allow_recursion)
 595                 return a % b
 596
 597             m = re.match(
 598                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 599             if m:
 600                 fname = m.group('func')
 601                 if fname not in functions:
 602                     functions[fname] = extract_function(fname)
 603                 argvals = [int(v) if v.isdigit() else local_vars[v]
 604                            for v in m.group('args').split(',')]
 605                 return functions[fname](argvals)
 606             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 607
 608         def extract_function(funcname):
 609             func_m = re.search(
 610                 r'function ' + re.escape(funcname) +
 611                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 612                 jscode)
 613             argnames = func_m.group('args').split(',')
 614
 615             def resf(args):
 616                 local_vars = dict(zip(argnames, args))
 617                 for stmt in func_m.group('code').split(';'):
 618                     res = interpret_statement(stmt, local_vars)
 619                 return res
 620             return resf
 621
 622         initial_function = extract_function(funcname)
 623         return lambda s: initial_function([s])
 624
 625     def _parse_sig_swf(self, file_contents):
 626         if file_contents[1:3] != b'WS':
 627             raise ExtractorError(
 628                 u'Not an SWF file; header is %r' % file_contents[:3])
 629         if file_contents[:1] == b'C':
 630             content = zlib.decompress(file_contents[8:])
 631         else:
 632             raise NotImplementedError(u'Unsupported compression format %r' %
 633                                       file_contents[:1])
 634
 635         def extract_tags(content):
 636             pos = 0
 637             while pos < len(content):
 638                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 639                 pos += 2
 640                 tag_code = header16 >> 6
 641                 tag_len = header16 & 0x3f
 642                 if tag_len == 0x3f:
 643                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 644                     pos += 4
 645                 assert pos+tag_len <= len(content)
 646                 yield (tag_code, content[pos:pos+tag_len])
 647                 pos += tag_len
 648
 649         code_tag = next(tag
 650                         for tag_code, tag in extract_tags(content)
 651                         if tag_code == 82)
 652         p = code_tag.index(b'\0', 4) + 1
 653         code_reader = io.BytesIO(code_tag[p:])
 654
 655         # Parse ABC (AVM2 ByteCode)
 656         def read_int(reader=None):
 657             if reader is None:
 658                 reader = code_reader
 659             res = 0
 660             shift = 0
 661             for _ in range(5):
 662                 buf = reader.read(1)
 663                 assert len(buf) == 1
 664                 b = struct.unpack('<B', buf)[0]
 665                 res = res | ((b & 0x7f) << shift)
 666                 if b & 0x80 == 0:
 667                     break
 668                 shift += 7
 669             return res
 670
 671         def u30(reader=None):
 672             res = read_int(reader)
 673             assert res & 0xf0000000 == 0
 674             return res
 675         u32 = read_int
 676
 677         def s32(reader=None):
 678             v = read_int(reader)
 679             if v & 0x80000000 != 0:
 680                 v = - ((v ^ 0xffffffff) + 1)
 681             return v
 682
 683         def read_string(reader=None):
 684             if reader is None:
 685                 reader = code_reader
 686             slen = u30(reader)
 687             resb = reader.read(slen)
 688             assert len(resb) == slen
 689             return resb.decode('utf-8')
 690
 691         def read_bytes(count, reader=None):
 692             if reader is None:
 693                 reader = code_reader
 694             resb = reader.read(count)
 695             assert len(resb) == count
 696             return resb
 697
 698         def read_byte(reader=None):
 699             resb = read_bytes(1, reader=reader)
 700             res = struct.unpack('<B', resb)[0]
 701             return res
 702
 703         # minor_version + major_version
 704         read_bytes(2 + 2)
 705
 706         # Constant pool
 707         int_count = u30()
 708         for _c in range(1, int_count):
 709             s32()
 710         uint_count = u30()
 711         for _c in range(1, uint_count):
 712             u32()
 713         double_count = u30()
 714         read_bytes((double_count-1) * 8)
 715         string_count = u30()
 716         constant_strings = [u'']
 717         for _c in range(1, string_count):
 718             s = read_string()
 719             constant_strings.append(s)
 720         namespace_count = u30()
 721         for _c in range(1, namespace_count):
 722             read_bytes(1)  # kind
 723             u30()  # name
 724         ns_set_count = u30()
 725         for _c in range(1, ns_set_count):
 726             count = u30()
 727             for _c2 in range(count):
 728                 u30()
 729         multiname_count = u30()
 730         MULTINAME_SIZES = {
 731             0x07: 2,  # QName
 732             0x0d: 2,  # QNameA
 733             0x0f: 1,  # RTQName
 734             0x10: 1,  # RTQNameA
 735             0x11: 0,  # RTQNameL
 736             0x12: 0,  # RTQNameLA
 737             0x09: 2,  # Multiname
 738             0x0e: 2,  # MultinameA
 739             0x1b: 1,  # MultinameL
 740             0x1c: 1,  # MultinameLA
 741         }
 742         multinames = [u'']
 743         for _c in range(1, multiname_count):
 744             kind = u30()
 745             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 746             if kind == 0x07:
 747                 u30()  # namespace_idx
 748                 name_idx = u30()
 749                 multinames.append(constant_strings[name_idx])
 750             else:
 751                 multinames.append('[MULTINAME kind: %d]' % kind)
 752                 for _c2 in range(MULTINAME_SIZES[kind]):
 753                     u30()
 754
 755         # Methods
 756         method_count = u30()
 757         MethodInfo = collections.namedtuple(
 758             'MethodInfo',
 759             ['NEED_ARGUMENTS', 'NEED_REST'])
 760         method_infos = []
 761         for method_id in range(method_count):
 762             param_count = u30()
 763             u30()  # return type
 764             for _ in range(param_count):
 765                 u30()  # param type
 766             u30()  # name index (always 0 for youtube)
 767             flags = read_byte()
 768             if flags & 0x08 != 0:
 769                 # Options present
 770                 option_count = u30()
 771                 for c in range(option_count):
 772                     u30()  # val
 773                     read_bytes(1)  # kind
 774             if flags & 0x80 != 0:
 775                 # Param names present
 776                 for _ in range(param_count):
 777                     u30()  # param name
 778             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 779             method_infos.append(mi)
 780
 781         # Metadata
 782         metadata_count = u30()
 783         for _c in range(metadata_count):
 784             u30()  # name
 785             item_count = u30()
 786             for _c2 in range(item_count):
 787                 u30()  # key
 788                 u30()  # value
 789
 790         def parse_traits_info():
 791             trait_name_idx = u30()
 792             kind_full = read_byte()
 793             kind = kind_full & 0x0f
 794             attrs = kind_full >> 4
 795             methods = {}
 796             if kind in [0x00, 0x06]:  # Slot or Const
 797                 u30()  # Slot id
 798                 u30()  # type_name_idx
 799                 vindex = u30()
 800                 if vindex != 0:
 801                     read_byte()  # vkind
 802             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 803                 u30()  # disp_id
 804                 method_idx = u30()
 805                 methods[multinames[trait_name_idx]] = method_idx
 806             elif kind == 0x04:  # Class
 807                 u30()  # slot_id
 808                 u30()  # classi
 809             elif kind == 0x05:  # Function
 810                 u30()  # slot_id
 811                 function_idx = u30()
 812                 methods[function_idx] = multinames[trait_name_idx]
 813             else:
 814                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 815
 816             if attrs & 0x4 != 0:  # Metadata present
 817                 metadata_count = u30()
 818                 for _c3 in range(metadata_count):
 819                     u30()  # metadata index
 820
 821             return methods
 822
 823         # Classes
 824         TARGET_CLASSNAME = u'SignatureDecipher'
 825         searched_idx = multinames.index(TARGET_CLASSNAME)
 826         searched_class_id = None
 827         class_count = u30()
 828         for class_id in range(class_count):
 829             name_idx = u30()
 830             if name_idx == searched_idx:
 831                 # We found the class we're looking for!
 832                 searched_class_id = class_id
 833             u30()  # super_name idx
 834             flags = read_byte()
 835             if flags & 0x08 != 0:  # Protected namespace is present
 836                 u30()  # protected_ns_idx
 837             intrf_count = u30()
 838             for _c2 in range(intrf_count):
 839                 u30()
 840             u30()  # iinit
 841             trait_count = u30()
 842             for _c2 in range(trait_count):
 843                 parse_traits_info()
 844
 845         if searched_class_id is None:
 846             raise ExtractorError(u'Target class %r not found' %
 847                                  TARGET_CLASSNAME)
 848
 849         method_names = {}
 850         method_idxs = {}
 851         for class_id in range(class_count):
 852             u30()  # cinit
 853             trait_count = u30()
 854             for _c2 in range(trait_count):
 855                 trait_methods = parse_traits_info()
 856                 if class_id == searched_class_id:
 857                     method_names.update(trait_methods.items())
 858                     method_idxs.update(dict(
 859                         (idx, name)
 860                         for name, idx in trait_methods.items()))
 861
 862         # Scripts
 863         script_count = u30()
 864         for _c in range(script_count):
 865             u30()  # init
 866             trait_count = u30()
 867             for _c2 in range(trait_count):
 868                 parse_traits_info()
 869
 870         # Method bodies
 871         method_body_count = u30()
 872         Method = collections.namedtuple('Method', ['code', 'local_count'])
 873         methods = {}
 874         for _c in range(method_body_count):
 875             method_idx = u30()
 876             u30()  # max_stack
 877             local_count = u30()
 878             u30()  # init_scope_depth
 879             u30()  # max_scope_depth
 880             code_length = u30()
 881             code = read_bytes(code_length)
 882             if method_idx in method_idxs:
 883                 m = Method(code, local_count)
 884                 methods[method_idxs[method_idx]] = m
 885             exception_count = u30()
 886             for _c2 in range(exception_count):
 887                 u30()  # from
 888                 u30()  # to
 889                 u30()  # target
 890                 u30()  # exc_type
 891                 u30()  # var_name
 892             trait_count = u30()
 893             for _c2 in range(trait_count):
 894                 parse_traits_info()
 895
 896         assert p + code_reader.tell() == len(code_tag)
 897         assert len(methods) == len(method_idxs)
 898
 899         method_pyfunctions = {}
 900
 901         def extract_function(func_name):
 902             if func_name in method_pyfunctions:
 903                 return method_pyfunctions[func_name]
 904             if func_name not in methods:
 905                 raise ExtractorError(u'Cannot find function %r' % func_name)
 906             m = methods[func_name]
 907
 908             def resfunc(args):
 909                 registers = ['(this)'] + list(args) + [None] * m.local_count
 910                 stack = []
 911                 coder = io.BytesIO(m.code)
 912                 while True:
 913                     opcode = struct.unpack('!B', coder.read(1))[0]
 914                     if opcode == 36:  # pushbyte
 915                         v = struct.unpack('!B', coder.read(1))[0]
 916                         stack.append(v)
 917                     elif opcode == 44:  # pushstring
 918                         idx = u30(coder)
 919                         stack.append(constant_strings[idx])
 920                     elif opcode == 48:  # pushscope
 921                         # We don't implement the scope register, so we'll just
 922                         # ignore the popped value
 923                         stack.pop()
 924                     elif opcode == 70:  # callproperty
 925                         index = u30(coder)
 926                         mname = multinames[index]
 927                         arg_count = u30(coder)
 928                         args = list(reversed(
 929                             [stack.pop() for _ in range(arg_count)]))
 930                         obj = stack.pop()
 931                         if mname == u'split':
 932                             assert len(args) == 1
 933                             assert isinstance(args[0], compat_str)
 934                             assert isinstance(obj, compat_str)
 935                             if args[0] == u'':
 936                                 res = list(obj)
 937                             else:
 938                                 res = obj.split(args[0])
 939                             stack.append(res)
 940                         elif mname == u'slice':
 941                             assert len(args) == 1
 942                             assert isinstance(args[0], int)
 943                             assert isinstance(obj, list)
 944                             res = obj[args[0]:]
 945                             stack.append(res)
 946                         elif mname == u'join':
 947                             assert len(args) == 1
 948                             assert isinstance(args[0], compat_str)
 949                             assert isinstance(obj, list)
 950                             res = args[0].join(obj)
 951                             stack.append(res)
 952                         elif mname in method_pyfunctions:
 953                             stack.append(method_pyfunctions[mname](args))
 954                         else:
 955                             raise NotImplementedError(
 956                                 u'Unsupported property %r on %r'
 957                                 % (mname, obj))
 958                     elif opcode == 72:  # returnvalue
 959                         res = stack.pop()
 960                         return res
 961                     elif opcode == 79:  # callpropvoid
 962                         index = u30(coder)
 963                         mname = multinames[index]
 964                         arg_count = u30(coder)
 965                         args = list(reversed(
 966                             [stack.pop() for _ in range(arg_count)]))
 967                         obj = stack.pop()
 968                         if mname == u'reverse':
 969                             assert isinstance(obj, list)
 970                             obj.reverse()
 971                         else:
 972                             raise NotImplementedError(
 973                                 u'Unsupported (void) property %r on %r'
 974                                 % (mname, obj))
 975                     elif opcode == 93:  # findpropstrict
 976                         index = u30(coder)
 977                         mname = multinames[index]
 978                         res = extract_function(mname)
 979                         stack.append(res)
 980                     elif opcode == 97:  # setproperty
 981                         index = u30(coder)
 982                         value = stack.pop()
 983                         idx = stack.pop()
 984                         obj = stack.pop()
 985                         assert isinstance(obj, list)
 986                         assert isinstance(idx, int)
 987                         obj[idx] = value
 988                     elif opcode == 98:  # getlocal
 989                         index = u30(coder)
 990                         stack.append(registers[index])
 991                     elif opcode == 99:  # setlocal
 992                         index = u30(coder)
 993                         value = stack.pop()
 994                         registers[index] = value
 995                     elif opcode == 102:  # getproperty
 996                         index = u30(coder)
 997                         pname = multinames[index]
 998                         if pname == u'length':
 999                             obj = stack.pop()
1000                             assert isinstance(obj, list)
1001                             stack.append(len(obj))
1002                         else:  # Assume attribute access
1003                             idx = stack.pop()
1004                             assert isinstance(idx, int)
1005                             obj = stack.pop()
1006                             assert isinstance(obj, list)
1007                             stack.append(obj[idx])
1008                     elif opcode == 128:  # coerce
1009                         u30(coder)
1010                     elif opcode == 133:  # coerce_s
1011                         assert isinstance(stack[-1], (type(None), compat_str))
1012                     elif opcode == 164:  # modulo
1013                         value2 = stack.pop()
1014                         value1 = stack.pop()
1015                         res = value1 % value2
1016                         stack.append(res)
1017                     elif opcode == 208:  # getlocal_0
1018                         stack.append(registers[0])
1019                     elif opcode == 209:  # getlocal_1
1020                         stack.append(registers[1])
1021                     elif opcode == 210:  # getlocal_2
1022                         stack.append(registers[2])
1023                     elif opcode == 211:  # getlocal_3
1024                         stack.append(registers[3])
1025                     elif opcode == 214:  # setlocal_2
1026                         registers[2] = stack.pop()
1027                     elif opcode == 215:  # setlocal_3
1028                         registers[3] = stack.pop()
1029                     else:
1030                         raise NotImplementedError(
1031                             u'Unsupported opcode %d' % opcode)
1032
1033             method_pyfunctions[func_name] = resfunc
1034             return resfunc
1035
1036         initial_function = extract_function(u'decipher')
1037         return lambda s: initial_function([s])
1038
1039     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1040         """Turn the encrypted s field into a working signature"""
1041
1042         if player_url is not None:
1043             try:
1044                 if player_url not in self._player_cache:
1045                     func = self._extract_signature_function(
1046                         video_id, player_url, len(s)
1047                     )
1048                     self._player_cache[player_url] = func
1049                 func = self._player_cache[player_url]
1050                 if self._downloader.params.get('youtube_print_sig_code'):
1051                     self._print_sig_code(func, len(s))
1052                 return func(s)
1053             except Exception:
1054                 tb = traceback.format_exc()
1055                 self._downloader.report_warning(
1056                     u'Automatic signature extraction failed: ' + tb)
1057
1058             self._downloader.report_warning(
1059                 u'Warning: Falling back to static signature algorithm')
1060
1061         return self._static_decrypt_signature(
1062             s, video_id, player_url, age_gate)
1063
1064     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1065         if age_gate:
1066             # The videos with age protection use another player, so the
1067             # algorithms can be different.
1068             if len(s) == 86:
1069                 return s[2:63] + s[82] + s[64:82] + s[63]
1070
1071         if len(s) == 93:
1072             return s[86:29:-1] + s[88] + s[28:5:-1]
1073         elif len(s) == 92:
1074             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1075         elif len(s) == 91:
1076             return s[84:27:-1] + s[86] + s[26:5:-1]
1077         elif len(s) == 90:
1078             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1079         elif len(s) == 89:
1080             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1081         elif len(s) == 88:
1082             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1083         elif len(s) == 87:
1084             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1085         elif len(s) == 86:
1086             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1087         elif len(s) == 85:
1088             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1089         elif len(s) == 84:
1090             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1091         elif len(s) == 83:
1092             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1093         elif len(s) == 82:
1094             return s[12] + s[79:12:-1] + s[80] + s[11::-1]
1095         elif len(s) == 81:
1096             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1097         elif len(s) == 80:
1098             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1099         elif len(s) == 79:
1100             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1101
1102         else:
1103             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1104
1105     def _get_available_subtitles(self, video_id):
1106         try:
1107             sub_list = self._download_webpage(
1108                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1109                 video_id, note=False)
1110         except ExtractorError as err:
1111             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1112             return {}
1113         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1114
1115         sub_lang_list = {}
1116         for l in lang_list:
1117             lang = l[1]
1118             params = compat_urllib_parse.urlencode({
1119                 'lang': lang,
1120                 'v': video_id,
1121                 'fmt': self._downloader.params.get('subtitlesformat'),
1122             })
1123             url = u'http://www.youtube.com/api/timedtext?' + params
1124             sub_lang_list[lang] = url
1125         if not sub_lang_list:
1126             self._downloader.report_warning(u'video doesn\'t have subtitles')
1127             return {}
1128         return sub_lang_list
1129
1130     def _get_available_automatic_caption(self, video_id, webpage):
1131         """We need the webpage for getting the captions url, pass it as an
1132            argument to speed up the process."""
1133         sub_format = self._downloader.params.get('subtitlesformat')
1134         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1135         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1136         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1137         if mobj is None:
1138             self._downloader.report_warning(err_msg)
1139             return {}
1140         player_config = json.loads(mobj.group(1))
1141         try:
1142             args = player_config[u'args']
1143             caption_url = args[u'ttsurl']
1144             timestamp = args[u'timestamp']
1145             # We get the available subtitles
1146             list_params = compat_urllib_parse.urlencode({
1147                 'type': 'list',
1148                 'tlangs': 1,
1149                 'asrs': 1,
1150             })
1151             list_url = caption_url + '&' + list_params
1152             list_page = self._download_webpage(list_url, video_id)
1153             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1154             original_lang_node = caption_list.find('track')
1155             if original_lang_node.attrib.get('kind') != 'asr' :
1156                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1157                 return {}
1158             original_lang = original_lang_node.attrib['lang_code']
1159
1160             sub_lang_list = {}
1161             for lang_node in caption_list.findall('target'):
1162                 sub_lang = lang_node.attrib['lang_code']
1163                 params = compat_urllib_parse.urlencode({
1164                     'lang': original_lang,
1165                     'tlang': sub_lang,
1166                     'fmt': sub_format,
1167                     'ts': timestamp,
1168                     'kind': 'asr',
1169                 })
1170                 sub_lang_list[sub_lang] = caption_url + '&' + params
1171             return sub_lang_list
1172         # An extractor error can be raise by the download process if there are
1173         # no automatic captions but there are subtitles
1174         except (KeyError, ExtractorError):
1175             self._downloader.report_warning(err_msg)
1176             return {}
1177
1178     def _print_formats(self, formats):
1179         print('Available formats:')
1180         for x in formats:
1181             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1182                                         self._video_dimensions.get(x, '???'),
1183                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1184
1185     def _extract_id(self, url):
1186         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1187         if mobj is None:
1188             raise ExtractorError(u'Invalid URL: %s' % url)
1189         video_id = mobj.group(2)
1190         return video_id
1191
1192     def _get_video_url_list(self, url_map):
1193         """
1194         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1195         with the requested formats.
1196         """
1197         req_format = self._downloader.params.get('format', None)
1198         format_limit = self._downloader.params.get('format_limit', None)
1199         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1200         if format_limit is not None and format_limit in available_formats:
1201             format_list = available_formats[available_formats.index(format_limit):]
1202         else:
1203             format_list = available_formats
1204         existing_formats = [x for x in format_list if x in url_map]
1205         if len(existing_formats) == 0:
1206             raise ExtractorError(u'no known formats available for video')
1207         if self._downloader.params.get('listformats', None):
1208             self._print_formats(existing_formats)
1209             return
1210         if req_format is None or req_format == 'best':
1211             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1212         elif req_format == 'worst':
1213             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1214         elif req_format in ('-1', 'all'):
1215             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1216         else:
1217             # Specific formats. We pick the first in a slash-delimeted sequence.
1218             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1219             # available in the specified format. For example,
1220             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1221             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1222             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1223             req_formats = req_format.split('/')
1224             video_url_list = None
1225             for rf in req_formats:
1226                 if rf in url_map:
1227                     video_url_list = [(rf, url_map[rf])]
1228                     break
1229                 if rf in self._video_formats_map:
1230                     for srf in self._video_formats_map[rf]:
1231                         if srf in url_map:
1232                             video_url_list = [(srf, url_map[srf])]
1233                             break
1234                     else:
1235                         continue
1236                     break
1237             if video_url_list is None:
1238                 raise ExtractorError(u'requested format not available')
1239         return video_url_list
1240
1241     def _extract_from_m3u8(self, manifest_url, video_id):
1242         url_map = {}
1243         def _get_urls(_manifest):
1244             lines = _manifest.split('\n')
1245             urls = filter(lambda l: l and not l.startswith('#'),
1246                             lines)
1247             return urls
1248         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1249         formats_urls = _get_urls(manifest)
1250         for format_url in formats_urls:
1251             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1252             url_map[itag] = format_url
1253         return url_map
1254
1255     def _real_extract(self, url):
1256         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1257             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1258
1259         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1260         mobj = re.search(self._NEXT_URL_RE, url)
1261         if mobj:
1262             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1263         video_id = self._extract_id(url)
1264
1265         # Get video webpage
1266         self.report_video_webpage_download(video_id)
1267         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1268         request = compat_urllib_request.Request(url)
1269         try:
1270             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1271         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1272             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1273
1274         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1275
1276         # Attempt to extract SWF player URL
1277         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1278         if mobj is not None:
1279             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1280         else:
1281             player_url = None
1282
1283         # Get video info
1284         self.report_video_info_webpage_download(video_id)
1285         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1286             self.report_age_confirmation()
1287             age_gate = True
1288             # We simulate the access to the video from www.youtube.com/v/{video_id}
1289             # this can be viewed without login into Youtube
1290             data = compat_urllib_parse.urlencode({'video_id': video_id,
1291                                                   'el': 'embedded',
1292                                                   'gl': 'US',
1293                                                   'hl': 'en',
1294                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1295                                                   'asv': 3,
1296                                                   'sts':'1588',
1297                                                   })
1298             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1299             video_info_webpage = self._download_webpage(video_info_url, video_id,
1300                                     note=False,
1301                                     errnote='unable to download video info webpage')
1302             video_info = compat_parse_qs(video_info_webpage)
1303         else:
1304             age_gate = False
1305             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1306                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1307                         % (video_id, el_type))
1308                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1309                                         note=False,
1310                                         errnote='unable to download video info webpage')
1311                 video_info = compat_parse_qs(video_info_webpage)
1312                 if 'token' in video_info:
1313                     break
1314         if 'token' not in video_info:
1315             if 'reason' in video_info:
1316                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1317             else:
1318                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1319
1320         # Check for "rental" videos
1321         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1322             raise ExtractorError(u'"rental" videos not supported')
1323
1324         # Start extracting information
1325         self.report_information_extraction(video_id)
1326
1327         # uploader
1328         if 'author' not in video_info:
1329             raise ExtractorError(u'Unable to extract uploader name')
1330         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1331
1332         # uploader_id
1333         video_uploader_id = None
1334         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1335         if mobj is not None:
1336             video_uploader_id = mobj.group(1)
1337         else:
1338             self._downloader.report_warning(u'unable to extract uploader nickname')
1339
1340         # title
1341         if 'title' in video_info:
1342             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1343         else:
1344             self._downloader.report_warning(u'Unable to extract video title')
1345             video_title = u'_'
1346
1347         # thumbnail image
1348         # We try first to get a high quality image:
1349         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1350                             video_webpage, re.DOTALL)
1351         if m_thumb is not None:
1352             video_thumbnail = m_thumb.group(1)
1353         elif 'thumbnail_url' not in video_info:
1354             self._downloader.report_warning(u'unable to extract video thumbnail')
1355             video_thumbnail = None
1356         else:   # don't panic if we can't find it
1357             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1358
1359         # upload date
1360         upload_date = None
1361         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1362         if mobj is not None:
1363             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1364             upload_date = unified_strdate(upload_date)
1365
1366         # description
1367         video_description = get_element_by_id("eow-description", video_webpage)
1368         if video_description:
1369             video_description = clean_html(video_description)
1370         else:
1371             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1372             if fd_mobj:
1373                 video_description = unescapeHTML(fd_mobj.group(1))
1374             else:
1375                 video_description = u''
1376
1377         # subtitles
1378         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1379
1380         if self._downloader.params.get('listsubtitles', False):
1381             self._list_available_subtitles(video_id, video_webpage)
1382             return
1383
1384         if 'length_seconds' not in video_info:
1385             self._downloader.report_warning(u'unable to extract video duration')
1386             video_duration = ''
1387         else:
1388             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1389
1390         # Decide which formats to download
1391
1392         try:
1393             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1394             if not mobj:
1395                 raise ValueError('Could not find vevo ID')
1396             info = json.loads(mobj.group(1))
1397             args = info['args']
1398             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1399             # this signatures are encrypted
1400             if 'url_encoded_fmt_stream_map' not in args:
1401                 raise ValueError(u'No stream_map present')  # caught below
1402             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1403             if m_s is not None:
1404                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1405                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1406             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1407             if m_s is not None:
1408                 if 'url_encoded_fmt_stream_map' in video_info:
1409                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1410                 else:
1411                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1412             elif 'adaptive_fmts' in video_info:
1413                 if 'url_encoded_fmt_stream_map' in video_info:
1414                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1415                 else:
1416                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1417         except ValueError:
1418             pass
1419
1420         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1421             self.report_rtmp_download()
1422             video_url_list = [(None, video_info['conn'][0])]
1423         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1424             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1425                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1426             url_map = {}
1427             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1428                 url_data = compat_parse_qs(url_data_str)
1429                 if 'itag' in url_data and 'url' in url_data:
1430                     url = url_data['url'][0]
1431                     if 'sig' in url_data:
1432                         url += '&signature=' + url_data['sig'][0]
1433                     elif 's' in url_data:
1434                         encrypted_sig = url_data['s'][0]
1435                         if self._downloader.params.get('verbose'):
1436                             if age_gate:
1437                                 if player_url is None:
1438                                     player_version = 'unknown'
1439                                 else:
1440                                     player_version = self._search_regex(
1441                                         r'-(.+)\.swf$', player_url,
1442                                         u'flash player', fatal=False)
1443                                 player_desc = 'flash player %s' % player_version
1444                             else:
1445                                 player_version = self._search_regex(
1446                                     r'html5player-(.+?)\.js', video_webpage,
1447                                     'html5 player', fatal=False)
1448                                 player_desc = u'html5 player %s' % player_version
1449
1450                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1451                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1452                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1453
1454                         if not age_gate:
1455                             jsplayer_url_json = self._search_regex(
1456                                 r'"assets":.+?"js":\s*("[^"]+")',
1457                                 video_webpage, u'JS player URL')
1458                             player_url = json.loads(jsplayer_url_json)
1459
1460                         signature = self._decrypt_signature(
1461                             encrypted_sig, video_id, player_url, age_gate)
1462                         url += '&signature=' + signature
1463                     if 'ratebypass' not in url:
1464                         url += '&ratebypass=yes'
1465                     url_map[url_data['itag'][0]] = url
1466             video_url_list = self._get_video_url_list(url_map)
1467             if not video_url_list:
1468                 return
1469         elif video_info.get('hlsvp'):
1470             manifest_url = video_info['hlsvp'][0]
1471             url_map = self._extract_from_m3u8(manifest_url, video_id)
1472             video_url_list = self._get_video_url_list(url_map)
1473             if not video_url_list:
1474                 return
1475
1476         else:
1477             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1478
1479         results = []
1480         for format_param, video_real_url in video_url_list:
1481             # Extension
1482             video_extension = self._video_extensions.get(format_param, 'flv')
1483
1484             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1485                                               self._video_dimensions.get(format_param, '???'),
1486                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1487
1488             results.append({
1489                 'id':       video_id,
1490                 'url':      video_real_url,
1491                 'uploader': video_uploader,
1492                 'uploader_id': video_uploader_id,
1493                 'upload_date':  upload_date,
1494                 'title':    video_title,
1495                 'ext':      video_extension,
1496                 'format':   video_format,
1497                 'thumbnail':    video_thumbnail,
1498                 'description':  video_description,
1499                 'player_url':   player_url,
1500                 'subtitles':    video_subtitles,
1501                 'duration':     video_duration
1502             })
1503         return results
1504
1505 class YoutubePlaylistIE(InfoExtractor):
1506     IE_DESC = u'YouTube.com playlists'
1507     _VALID_URL = r"""(?:
1508                         (?:https?://)?
1509                         (?:\w+\.)?
1510                         youtube\.com/
1511                         (?:
1512                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1513                            \? (?:.*?&)*? (?:p|a|list)=
1514                         |  p/
1515                         )
1516                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1517                         .*
1518                      |
1519                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1520                      )"""
1521     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1522     _MAX_RESULTS = 50
1523     IE_NAME = u'youtube:playlist'
1524
1525     @classmethod
1526     def suitable(cls, url):
1527         """Receives a URL and returns True if suitable for this IE."""
1528         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1529
1530     def _real_extract(self, url):
1531         # Extract playlist id
1532         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1533         if mobj is None:
1534             raise ExtractorError(u'Invalid URL: %s' % url)
1535         playlist_id = mobj.group(1) or mobj.group(2)
1536
1537         # Check if it's a video-specific URL
1538         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1539         if 'v' in query_dict:
1540             video_id = query_dict['v'][0]
1541             if self._downloader.params.get('noplaylist'):
1542                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1543                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1544             else:
1545                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1546
1547         # Download playlist videos from API
1548         videos = []
1549
1550         for page_num in itertools.count(1):
1551             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1552             if start_index >= 1000:
1553                 self._downloader.report_warning(u'Max number of results reached')
1554                 break
1555             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1556             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1557
1558             try:
1559                 response = json.loads(page)
1560             except ValueError as err:
1561                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1562
1563             if 'feed' not in response:
1564                 raise ExtractorError(u'Got a malformed response from YouTube API')
1565             playlist_title = response['feed']['title']['$t']
1566             if 'entry' not in response['feed']:
1567                 # Number of videos is a multiple of self._MAX_RESULTS
1568                 break
1569
1570             for entry in response['feed']['entry']:
1571                 index = entry['yt$position']['$t']
1572                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1573                     videos.append((
1574                         index,
1575                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1576                     ))
1577
1578         videos = [v[1] for v in sorted(videos)]
1579
1580         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1581         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1582
1583
1584 class YoutubeChannelIE(InfoExtractor):
1585     IE_DESC = u'YouTube.com channels'
1586     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1587     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1588     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1589     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1590     IE_NAME = u'youtube:channel'
1591
1592     def extract_videos_from_page(self, page):
1593         ids_in_page = []
1594         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1595             if mobj.group(1) not in ids_in_page:
1596                 ids_in_page.append(mobj.group(1))
1597         return ids_in_page
1598
1599     def _real_extract(self, url):
1600         # Extract channel id
1601         mobj = re.match(self._VALID_URL, url)
1602         if mobj is None:
1603             raise ExtractorError(u'Invalid URL: %s' % url)
1604
1605         # Download channel page
1606         channel_id = mobj.group(1)
1607         video_ids = []
1608         pagenum = 1
1609
1610         url = self._TEMPLATE_URL % (channel_id, pagenum)
1611         page = self._download_webpage(url, channel_id,
1612                                       u'Downloading page #%s' % pagenum)
1613
1614         # Extract video identifiers
1615         ids_in_page = self.extract_videos_from_page(page)
1616         video_ids.extend(ids_in_page)
1617
1618         # Download any subsequent channel pages using the json-based channel_ajax query
1619         if self._MORE_PAGES_INDICATOR in page:
1620             for pagenum in itertools.count(1):
1621                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1622                 page = self._download_webpage(url, channel_id,
1623                                               u'Downloading page #%s' % pagenum)
1624
1625                 page = json.loads(page)
1626
1627                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1628                 video_ids.extend(ids_in_page)
1629
1630                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1631                     break
1632
1633         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1634
1635         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1636         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1637         return [self.playlist_result(url_entries, channel_id)]
1638
1639
1640 class YoutubeUserIE(InfoExtractor):
1641     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1642     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1643     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1644     _GDATA_PAGE_SIZE = 50
1645     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1646     IE_NAME = u'youtube:user'
1647
1648     @classmethod
1649     def suitable(cls, url):
1650         # Don't return True if the url can be extracted with other youtube
1651         # extractor, the regex would is too permissive and it would match.
1652         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1653         if any(ie.suitable(url) for ie in other_ies): return False
1654         else: return super(YoutubeUserIE, cls).suitable(url)
1655
1656     def _real_extract(self, url):
1657         # Extract username
1658         mobj = re.match(self._VALID_URL, url)
1659         if mobj is None:
1660             raise ExtractorError(u'Invalid URL: %s' % url)
1661
1662         username = mobj.group(1)
1663
1664         # Download video ids using YouTube Data API. Result size per
1665         # query is limited (currently to 50 videos) so we need to query
1666         # page by page until there are no video ids - it means we got
1667         # all of them.
1668
1669         video_ids = []
1670
1671         for pagenum in itertools.count(0):
1672             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1673
1674             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1675             page = self._download_webpage(gdata_url, username,
1676                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1677
1678             try:
1679                 response = json.loads(page)
1680             except ValueError as err:
1681                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1682             if 'entry' not in response['feed']:
1683                 # Number of videos is a multiple of self._MAX_RESULTS
1684                 break
1685
1686             # Extract video identifiers
1687             ids_in_page = []
1688             for entry in response['feed']['entry']:
1689                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1690             video_ids.extend(ids_in_page)
1691
1692             # A little optimization - if current page is not
1693             # "full", ie. does not contain PAGE_SIZE video ids then
1694             # we can assume that this page is the last one - there
1695             # are no more ids on further pages - no need to query
1696             # again.
1697
1698             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1699                 break
1700
1701         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1702         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1703         return [self.playlist_result(url_results, playlist_title = username)]
1704
1705 class YoutubeSearchIE(SearchInfoExtractor):
1706     IE_DESC = u'YouTube.com searches'
1707     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1708     _MAX_RESULTS = 1000
1709     IE_NAME = u'youtube:search'
1710     _SEARCH_KEY = 'ytsearch'
1711
1712     def report_download_page(self, query, pagenum):
1713         """Report attempt to download search page with given number."""
1714         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1715
1716     def _get_n_results(self, query, n):
1717         """Get a specified number of results for a query"""
1718
1719         video_ids = []
1720         pagenum = 0
1721         limit = n
1722
1723         while (50 * pagenum) < limit:
1724             self.report_download_page(query, pagenum+1)
1725             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1726             request = compat_urllib_request.Request(result_url)
1727             try:
1728                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1729             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1730                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1731             api_response = json.loads(data)['data']
1732
1733             if not 'items' in api_response:
1734                 raise ExtractorError(u'[youtube] No video results')
1735
1736             new_ids = list(video['id'] for video in api_response['items'])
1737             video_ids += new_ids
1738
1739             limit = min(n, api_response['totalItems'])
1740             pagenum += 1
1741
1742         if len(video_ids) > n:
1743             video_ids = video_ids[:n]
1744         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1745         return self.playlist_result(videos, query)
1746
1747
1748 class YoutubeShowIE(InfoExtractor):
1749     IE_DESC = u'YouTube.com (multi-season) shows'
1750     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1751     IE_NAME = u'youtube:show'
1752
1753     def _real_extract(self, url):
1754         mobj = re.match(self._VALID_URL, url)
1755         show_name = mobj.group(1)
1756         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1757         # There's one playlist for each season of the show
1758         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1759         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1760         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1761
1762
1763 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1764     """
1765     Base class for extractors that fetch info from
1766     http://www.youtube.com/feed_ajax
1767     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1768     """
1769     _LOGIN_REQUIRED = True
1770     _PAGING_STEP = 30
1771     # use action_load_personal_feed instead of action_load_system_feed
1772     _PERSONAL_FEED = False
1773
1774     @property
1775     def _FEED_TEMPLATE(self):
1776         action = 'action_load_system_feed'
1777         if self._PERSONAL_FEED:
1778             action = 'action_load_personal_feed'
1779         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1780
1781     @property
1782     def IE_NAME(self):
1783         return u'youtube:%s' % self._FEED_NAME
1784
1785     def _real_initialize(self):
1786         self._login()
1787
1788     def _real_extract(self, url):
1789         feed_entries = []
1790         # The step argument is available only in 2.7 or higher
1791         for i in itertools.count(0):
1792             paging = i*self._PAGING_STEP
1793             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1794                                           u'%s feed' % self._FEED_NAME,
1795                                           u'Downloading page %s' % i)
1796             info = json.loads(info)
1797             feed_html = info['feed_html']
1798             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1799             ids = orderedSet(m.group(1) for m in m_ids)
1800             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1801             if info['paging'] is None:
1802                 break
1803         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1804
1805 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1806     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1807     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1808     _FEED_NAME = 'subscriptions'
1809     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1810
1811 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1814     _FEED_NAME = 'recommended'
1815     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1816
1817 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1818     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1820     _FEED_NAME = 'watch_later'
1821     _PLAYLIST_TITLE = u'Youtube Watch Later'
1822     _PAGING_STEP = 100
1823     _PERSONAL_FEED = True
1824
1825 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1826     IE_NAME = u'youtube:favorites'
1827     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1828     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1829     _LOGIN_REQUIRED = True
1830
1831     def _real_extract(self, url):
1832         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1833         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1834         return self.url_result(playlist_id, 'YoutubePlaylist')