_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381         {
 382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 383             u'file': u'TGi3HqYrWHE.mp4',
 384             u'note': u'm3u8 video',
 385             u'info_dict': {
 386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
 387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 388                 u'uploader': u'olympic',
 389                 u'upload_date': u'20120807',
 390                 u'uploader_id': u'olympic',
 391             },
 392             u'params': {
 393                 u'skip_download': True,
 394             },
 395         },
 396     ]
 397
 398
 399     @classmethod
 400     def suitable(cls, url):
 401         """Receives a URL and returns True if suitable for this IE."""
 402         if YoutubePlaylistIE.suitable(url): return False
 403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_webpage_download(self, video_id):
 410         """Report attempt to download video webpage."""
 411         self.to_screen(u'%s: Downloading video webpage' % video_id)
 412
 413     def report_video_info_webpage_download(self, video_id):
 414         """Report attempt to download video info webpage."""
 415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 416
 417     def report_information_extraction(self, video_id):
 418         """Report attempt to extract video information."""
 419         self.to_screen(u'%s: Extracting video information' % video_id)
 420
 421     def report_unavailable_format(self, video_id, format):
 422         """Report extracted video URL."""
 423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 424
 425     def report_rtmp_download(self):
 426         """Indicate the download will use the RTMP protocol."""
 427         self.to_screen(u'RTMP download detected')
 428
 429     def _extract_signature_function(self, video_id, player_url, slen):
 430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 431                         player_url)
 432         player_type = id_m.group('ext')
 433         player_id = id_m.group('id')
 434
 435         # Read from filesystem cache
 436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 437         assert os.path.basename(func_id) == func_id
 438         cache_dir = self._downloader.params.get('cachedir',
 439                                                 u'~/.youtube-dl/cache')
 440
 441         if cache_dir != u'NONE':
 442             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 443                                     u'youtube-sigfuncs',
 444                                     func_id + '.json')
 445             try:
 446                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 447                     cache_spec = json.load(cachef)
 448                 return lambda s: u''.join(s[i] for i in cache_spec)
 449             except IOError:
 450                 pass  # No cache available
 451
 452         if player_type == 'js':
 453             code = self._download_webpage(
 454                 player_url, video_id,
 455                 note=u'Downloading %s player %s' % (player_type, player_id),
 456                 errnote=u'Download of %s failed' % player_url)
 457             res = self._parse_sig_js(code)
 458         elif player_type == 'swf':
 459             urlh = self._request_webpage(
 460                 player_url, video_id,
 461                 note=u'Downloading %s player %s' % (player_type, player_id),
 462                 errnote=u'Download of %s failed' % player_url)
 463             code = urlh.read()
 464             res = self._parse_sig_swf(code)
 465         else:
 466             assert False, 'Invalid player type %r' % player_type
 467
 468         if cache_dir is not False:
 469             try:
 470                 cache_res = res(map(compat_chr, range(slen)))
 471                 cache_spec = [ord(c) for c in cache_res]
 472                 try:
 473                     os.makedirs(os.path.dirname(cache_fn))
 474                 except OSError as ose:
 475                     if ose.errno != errno.EEXIST:
 476                         raise
 477                 write_json_file(cache_spec, cache_fn)
 478             except Exception:
 479                 tb = traceback.format_exc()
 480                 self._downloader.report_warning(
 481                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 482
 483         return res
 484
 485     def _print_sig_code(self, func, slen):
 486         def gen_sig_code(idxs):
 487             def _genslice(start, end, step):
 488                 starts = u'' if start == 0 else str(start)
 489                 ends = u':%d' % (end+step)
 490                 steps = u'' if step == 1 else (':%d' % step)
 491                 return u's[%s%s%s]' % (starts, ends, steps)
 492
 493             step = None
 494             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 495                                     # set as soon as step is set
 496             for i, prev in zip(idxs[1:], idxs[:-1]):
 497                 if step is not None:
 498                     if i - prev == step:
 499                         continue
 500                     yield _genslice(start, prev, step)
 501                     step = None
 502                     continue
 503                 if i - prev in [-1, 1]:
 504                     step = i - prev
 505                     start = prev
 506                     continue
 507                 else:
 508                     yield u's[%d]' % prev
 509             if step is None:
 510                 yield u's[%d]' % i
 511             else:
 512                 yield _genslice(start, i, step)
 513
 514         cache_res = func(map(compat_chr, range(slen)))
 515         cache_spec = [ord(c) for c in cache_res]
 516         expr_code = u' + '.join(gen_sig_code(cache_spec))
 517         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 518         self.to_screen(u'Extracted signature:\n' + code)
 519
 520     def _parse_sig_js(self, jscode):
 521         funcname = self._search_regex(
 522             r'signature=([a-zA-Z]+)', jscode,
 523             u'Initial JS player signature function name')
 524
 525         functions = {}
 526
 527         def argidx(varname):
 528             return string.lowercase.index(varname)
 529
 530         def interpret_statement(stmt, local_vars, allow_recursion=20):
 531             if allow_recursion < 0:
 532                 raise ExtractorError(u'Recursion limit reached')
 533
 534             if stmt.startswith(u'var '):
 535                 stmt = stmt[len(u'var '):]
 536             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 537                              r'=(?P<expr>.*)$', stmt)
 538             if ass_m:
 539                 if ass_m.groupdict().get('index'):
 540                     def assign(val):
 541                         lvar = local_vars[ass_m.group('out')]
 542                         idx = interpret_expression(ass_m.group('index'),
 543                                                    local_vars, allow_recursion)
 544                         assert isinstance(idx, int)
 545                         lvar[idx] = val
 546                         return val
 547                     expr = ass_m.group('expr')
 548                 else:
 549                     def assign(val):
 550                         local_vars[ass_m.group('out')] = val
 551                         return val
 552                     expr = ass_m.group('expr')
 553             elif stmt.startswith(u'return '):
 554                 assign = lambda v: v
 555                 expr = stmt[len(u'return '):]
 556             else:
 557                 raise ExtractorError(
 558                     u'Cannot determine left side of statement in %r' % stmt)
 559
 560             v = interpret_expression(expr, local_vars, allow_recursion)
 561             return assign(v)
 562
 563         def interpret_expression(expr, local_vars, allow_recursion):
 564             if expr.isdigit():
 565                 return int(expr)
 566
 567             if expr.isalpha():
 568                 return local_vars[expr]
 569
 570             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 571             if m:
 572                 member = m.group('member')
 573                 val = local_vars[m.group('in')]
 574                 if member == 'split("")':
 575                     return list(val)
 576                 if member == 'join("")':
 577                     return u''.join(val)
 578                 if member == 'length':
 579                     return len(val)
 580                 if member == 'reverse()':
 581                     return val[::-1]
 582                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 583                 if slice_m:
 584                     idx = interpret_expression(
 585                         slice_m.group('idx'), local_vars, allow_recursion-1)
 586                     return val[idx:]
 587
 588             m = re.match(
 589                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 590             if m:
 591                 val = local_vars[m.group('in')]
 592                 idx = interpret_expression(m.group('idx'), local_vars,
 593                                            allow_recursion-1)
 594                 return val[idx]
 595
 596             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 597             if m:
 598                 a = interpret_expression(m.group('a'),
 599                                          local_vars, allow_recursion)
 600                 b = interpret_expression(m.group('b'),
 601                                          local_vars, allow_recursion)
 602                 return a % b
 603
 604             m = re.match(
 605                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 606             if m:
 607                 fname = m.group('func')
 608                 if fname not in functions:
 609                     functions[fname] = extract_function(fname)
 610                 argvals = [int(v) if v.isdigit() else local_vars[v]
 611                            for v in m.group('args').split(',')]
 612                 return functions[fname](argvals)
 613             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 614
 615         def extract_function(funcname):
 616             func_m = re.search(
 617                 r'function ' + re.escape(funcname) +
 618                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 619                 jscode)
 620             argnames = func_m.group('args').split(',')
 621
 622             def resf(args):
 623                 local_vars = dict(zip(argnames, args))
 624                 for stmt in func_m.group('code').split(';'):
 625                     res = interpret_statement(stmt, local_vars)
 626                 return res
 627             return resf
 628
 629         initial_function = extract_function(funcname)
 630         return lambda s: initial_function([s])
 631
 632     def _parse_sig_swf(self, file_contents):
 633         if file_contents[1:3] != b'WS':
 634             raise ExtractorError(
 635                 u'Not an SWF file; header is %r' % file_contents[:3])
 636         if file_contents[:1] == b'C':
 637             content = zlib.decompress(file_contents[8:])
 638         else:
 639             raise NotImplementedError(u'Unsupported compression format %r' %
 640                                       file_contents[:1])
 641
 642         def extract_tags(content):
 643             pos = 0
 644             while pos < len(content):
 645                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 646                 pos += 2
 647                 tag_code = header16 >> 6
 648                 tag_len = header16 & 0x3f
 649                 if tag_len == 0x3f:
 650                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 651                     pos += 4
 652                 assert pos+tag_len <= len(content)
 653                 yield (tag_code, content[pos:pos+tag_len])
 654                 pos += tag_len
 655
 656         code_tag = next(tag
 657                         for tag_code, tag in extract_tags(content)
 658                         if tag_code == 82)
 659         p = code_tag.index(b'\0', 4) + 1
 660         code_reader = io.BytesIO(code_tag[p:])
 661
 662         # Parse ABC (AVM2 ByteCode)
 663         def read_int(reader=None):
 664             if reader is None:
 665                 reader = code_reader
 666             res = 0
 667             shift = 0
 668             for _ in range(5):
 669                 buf = reader.read(1)
 670                 assert len(buf) == 1
 671                 b = struct.unpack('<B', buf)[0]
 672                 res = res | ((b & 0x7f) << shift)
 673                 if b & 0x80 == 0:
 674                     break
 675                 shift += 7
 676             return res
 677
 678         def u30(reader=None):
 679             res = read_int(reader)
 680             assert res & 0xf0000000 == 0
 681             return res
 682         u32 = read_int
 683
 684         def s32(reader=None):
 685             v = read_int(reader)
 686             if v & 0x80000000 != 0:
 687                 v = - ((v ^ 0xffffffff) + 1)
 688             return v
 689
 690         def read_string(reader=None):
 691             if reader is None:
 692                 reader = code_reader
 693             slen = u30(reader)
 694             resb = reader.read(slen)
 695             assert len(resb) == slen
 696             return resb.decode('utf-8')
 697
 698         def read_bytes(count, reader=None):
 699             if reader is None:
 700                 reader = code_reader
 701             resb = reader.read(count)
 702             assert len(resb) == count
 703             return resb
 704
 705         def read_byte(reader=None):
 706             resb = read_bytes(1, reader=reader)
 707             res = struct.unpack('<B', resb)[0]
 708             return res
 709
 710         # minor_version + major_version
 711         read_bytes(2 + 2)
 712
 713         # Constant pool
 714         int_count = u30()
 715         for _c in range(1, int_count):
 716             s32()
 717         uint_count = u30()
 718         for _c in range(1, uint_count):
 719             u32()
 720         double_count = u30()
 721         read_bytes((double_count-1) * 8)
 722         string_count = u30()
 723         constant_strings = [u'']
 724         for _c in range(1, string_count):
 725             s = read_string()
 726             constant_strings.append(s)
 727         namespace_count = u30()
 728         for _c in range(1, namespace_count):
 729             read_bytes(1)  # kind
 730             u30()  # name
 731         ns_set_count = u30()
 732         for _c in range(1, ns_set_count):
 733             count = u30()
 734             for _c2 in range(count):
 735                 u30()
 736         multiname_count = u30()
 737         MULTINAME_SIZES = {
 738             0x07: 2,  # QName
 739             0x0d: 2,  # QNameA
 740             0x0f: 1,  # RTQName
 741             0x10: 1,  # RTQNameA
 742             0x11: 0,  # RTQNameL
 743             0x12: 0,  # RTQNameLA
 744             0x09: 2,  # Multiname
 745             0x0e: 2,  # MultinameA
 746             0x1b: 1,  # MultinameL
 747             0x1c: 1,  # MultinameLA
 748         }
 749         multinames = [u'']
 750         for _c in range(1, multiname_count):
 751             kind = u30()
 752             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 753             if kind == 0x07:
 754                 u30()  # namespace_idx
 755                 name_idx = u30()
 756                 multinames.append(constant_strings[name_idx])
 757             else:
 758                 multinames.append('[MULTINAME kind: %d]' % kind)
 759                 for _c2 in range(MULTINAME_SIZES[kind]):
 760                     u30()
 761
 762         # Methods
 763         method_count = u30()
 764         MethodInfo = collections.namedtuple(
 765             'MethodInfo',
 766             ['NEED_ARGUMENTS', 'NEED_REST'])
 767         method_infos = []
 768         for method_id in range(method_count):
 769             param_count = u30()
 770             u30()  # return type
 771             for _ in range(param_count):
 772                 u30()  # param type
 773             u30()  # name index (always 0 for youtube)
 774             flags = read_byte()
 775             if flags & 0x08 != 0:
 776                 # Options present
 777                 option_count = u30()
 778                 for c in range(option_count):
 779                     u30()  # val
 780                     read_bytes(1)  # kind
 781             if flags & 0x80 != 0:
 782                 # Param names present
 783                 for _ in range(param_count):
 784                     u30()  # param name
 785             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 786             method_infos.append(mi)
 787
 788         # Metadata
 789         metadata_count = u30()
 790         for _c in range(metadata_count):
 791             u30()  # name
 792             item_count = u30()
 793             for _c2 in range(item_count):
 794                 u30()  # key
 795                 u30()  # value
 796
 797         def parse_traits_info():
 798             trait_name_idx = u30()
 799             kind_full = read_byte()
 800             kind = kind_full & 0x0f
 801             attrs = kind_full >> 4
 802             methods = {}
 803             if kind in [0x00, 0x06]:  # Slot or Const
 804                 u30()  # Slot id
 805                 u30()  # type_name_idx
 806                 vindex = u30()
 807                 if vindex != 0:
 808                     read_byte()  # vkind
 809             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 810                 u30()  # disp_id
 811                 method_idx = u30()
 812                 methods[multinames[trait_name_idx]] = method_idx
 813             elif kind == 0x04:  # Class
 814                 u30()  # slot_id
 815                 u30()  # classi
 816             elif kind == 0x05:  # Function
 817                 u30()  # slot_id
 818                 function_idx = u30()
 819                 methods[function_idx] = multinames[trait_name_idx]
 820             else:
 821                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 822
 823             if attrs & 0x4 != 0:  # Metadata present
 824                 metadata_count = u30()
 825                 for _c3 in range(metadata_count):
 826                     u30()  # metadata index
 827
 828             return methods
 829
 830         # Classes
 831         TARGET_CLASSNAME = u'SignatureDecipher'
 832         searched_idx = multinames.index(TARGET_CLASSNAME)
 833         searched_class_id = None
 834         class_count = u30()
 835         for class_id in range(class_count):
 836             name_idx = u30()
 837             if name_idx == searched_idx:
 838                 # We found the class we're looking for!
 839                 searched_class_id = class_id
 840             u30()  # super_name idx
 841             flags = read_byte()
 842             if flags & 0x08 != 0:  # Protected namespace is present
 843                 u30()  # protected_ns_idx
 844             intrf_count = u30()
 845             for _c2 in range(intrf_count):
 846                 u30()
 847             u30()  # iinit
 848             trait_count = u30()
 849             for _c2 in range(trait_count):
 850                 parse_traits_info()
 851
 852         if searched_class_id is None:
 853             raise ExtractorError(u'Target class %r not found' %
 854                                  TARGET_CLASSNAME)
 855
 856         method_names = {}
 857         method_idxs = {}
 858         for class_id in range(class_count):
 859             u30()  # cinit
 860             trait_count = u30()
 861             for _c2 in range(trait_count):
 862                 trait_methods = parse_traits_info()
 863                 if class_id == searched_class_id:
 864                     method_names.update(trait_methods.items())
 865                     method_idxs.update(dict(
 866                         (idx, name)
 867                         for name, idx in trait_methods.items()))
 868
 869         # Scripts
 870         script_count = u30()
 871         for _c in range(script_count):
 872             u30()  # init
 873             trait_count = u30()
 874             for _c2 in range(trait_count):
 875                 parse_traits_info()
 876
 877         # Method bodies
 878         method_body_count = u30()
 879         Method = collections.namedtuple('Method', ['code', 'local_count'])
 880         methods = {}
 881         for _c in range(method_body_count):
 882             method_idx = u30()
 883             u30()  # max_stack
 884             local_count = u30()
 885             u30()  # init_scope_depth
 886             u30()  # max_scope_depth
 887             code_length = u30()
 888             code = read_bytes(code_length)
 889             if method_idx in method_idxs:
 890                 m = Method(code, local_count)
 891                 methods[method_idxs[method_idx]] = m
 892             exception_count = u30()
 893             for _c2 in range(exception_count):
 894                 u30()  # from
 895                 u30()  # to
 896                 u30()  # target
 897                 u30()  # exc_type
 898                 u30()  # var_name
 899             trait_count = u30()
 900             for _c2 in range(trait_count):
 901                 parse_traits_info()
 902
 903         assert p + code_reader.tell() == len(code_tag)
 904         assert len(methods) == len(method_idxs)
 905
 906         method_pyfunctions = {}
 907
 908         def extract_function(func_name):
 909             if func_name in method_pyfunctions:
 910                 return method_pyfunctions[func_name]
 911             if func_name not in methods:
 912                 raise ExtractorError(u'Cannot find function %r' % func_name)
 913             m = methods[func_name]
 914
 915             def resfunc(args):
 916                 registers = ['(this)'] + list(args) + [None] * m.local_count
 917                 stack = []
 918                 coder = io.BytesIO(m.code)
 919                 while True:
 920                     opcode = struct.unpack('!B', coder.read(1))[0]
 921                     if opcode == 36:  # pushbyte
 922                         v = struct.unpack('!B', coder.read(1))[0]
 923                         stack.append(v)
 924                     elif opcode == 44:  # pushstring
 925                         idx = u30(coder)
 926                         stack.append(constant_strings[idx])
 927                     elif opcode == 48:  # pushscope
 928                         # We don't implement the scope register, so we'll just
 929                         # ignore the popped value
 930                         stack.pop()
 931                     elif opcode == 70:  # callproperty
 932                         index = u30(coder)
 933                         mname = multinames[index]
 934                         arg_count = u30(coder)
 935                         args = list(reversed(
 936                             [stack.pop() for _ in range(arg_count)]))
 937                         obj = stack.pop()
 938                         if mname == u'split':
 939                             assert len(args) == 1
 940                             assert isinstance(args[0], compat_str)
 941                             assert isinstance(obj, compat_str)
 942                             if args[0] == u'':
 943                                 res = list(obj)
 944                             else:
 945                                 res = obj.split(args[0])
 946                             stack.append(res)
 947                         elif mname == u'slice':
 948                             assert len(args) == 1
 949                             assert isinstance(args[0], int)
 950                             assert isinstance(obj, list)
 951                             res = obj[args[0]:]
 952                             stack.append(res)
 953                         elif mname == u'join':
 954                             assert len(args) == 1
 955                             assert isinstance(args[0], compat_str)
 956                             assert isinstance(obj, list)
 957                             res = args[0].join(obj)
 958                             stack.append(res)
 959                         elif mname in method_pyfunctions:
 960                             stack.append(method_pyfunctions[mname](args))
 961                         else:
 962                             raise NotImplementedError(
 963                                 u'Unsupported property %r on %r'
 964                                 % (mname, obj))
 965                     elif opcode == 72:  # returnvalue
 966                         res = stack.pop()
 967                         return res
 968                     elif opcode == 79:  # callpropvoid
 969                         index = u30(coder)
 970                         mname = multinames[index]
 971                         arg_count = u30(coder)
 972                         args = list(reversed(
 973                             [stack.pop() for _ in range(arg_count)]))
 974                         obj = stack.pop()
 975                         if mname == u'reverse':
 976                             assert isinstance(obj, list)
 977                             obj.reverse()
 978                         else:
 979                             raise NotImplementedError(
 980                                 u'Unsupported (void) property %r on %r'
 981                                 % (mname, obj))
 982                     elif opcode == 93:  # findpropstrict
 983                         index = u30(coder)
 984                         mname = multinames[index]
 985                         res = extract_function(mname)
 986                         stack.append(res)
 987                     elif opcode == 97:  # setproperty
 988                         index = u30(coder)
 989                         value = stack.pop()
 990                         idx = stack.pop()
 991                         obj = stack.pop()
 992                         assert isinstance(obj, list)
 993                         assert isinstance(idx, int)
 994                         obj[idx] = value
 995                     elif opcode == 98:  # getlocal
 996                         index = u30(coder)
 997                         stack.append(registers[index])
 998                     elif opcode == 99:  # setlocal
 999                         index = u30(coder)
1000                         value = stack.pop()
1001                         registers[index] = value
1002                     elif opcode == 102:  # getproperty
1003                         index = u30(coder)
1004                         pname = multinames[index]
1005                         if pname == u'length':
1006                             obj = stack.pop()
1007                             assert isinstance(obj, list)
1008                             stack.append(len(obj))
1009                         else:  # Assume attribute access
1010                             idx = stack.pop()
1011                             assert isinstance(idx, int)
1012                             obj = stack.pop()
1013                             assert isinstance(obj, list)
1014                             stack.append(obj[idx])
1015                     elif opcode == 128:  # coerce
1016                         u30(coder)
1017                     elif opcode == 133:  # coerce_s
1018                         assert isinstance(stack[-1], (type(None), compat_str))
1019                     elif opcode == 164:  # modulo
1020                         value2 = stack.pop()
1021                         value1 = stack.pop()
1022                         res = value1 % value2
1023                         stack.append(res)
1024                     elif opcode == 208:  # getlocal_0
1025                         stack.append(registers[0])
1026                     elif opcode == 209:  # getlocal_1
1027                         stack.append(registers[1])
1028                     elif opcode == 210:  # getlocal_2
1029                         stack.append(registers[2])
1030                     elif opcode == 211:  # getlocal_3
1031                         stack.append(registers[3])
1032                     elif opcode == 214:  # setlocal_2
1033                         registers[2] = stack.pop()
1034                     elif opcode == 215:  # setlocal_3
1035                         registers[3] = stack.pop()
1036                     else:
1037                         raise NotImplementedError(
1038                             u'Unsupported opcode %d' % opcode)
1039
1040             method_pyfunctions[func_name] = resfunc
1041             return resfunc
1042
1043         initial_function = extract_function(u'decipher')
1044         return lambda s: initial_function([s])
1045
1046     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1047         """Turn the encrypted s field into a working signature"""
1048
1049         if player_url is not None:
1050             try:
1051                 if player_url not in self._player_cache:
1052                     func = self._extract_signature_function(
1053                         video_id, player_url, len(s)
1054                     )
1055                     self._player_cache[player_url] = func
1056                 func = self._player_cache[player_url]
1057                 if self._downloader.params.get('youtube_print_sig_code'):
1058                     self._print_sig_code(func, len(s))
1059                 return func(s)
1060             except Exception:
1061                 tb = traceback.format_exc()
1062                 self._downloader.report_warning(
1063                     u'Automatic signature extraction failed: ' + tb)
1064
1065         self._downloader.report_warning(
1066             u'Warning: Falling back to static signature algorithm')
1067         return self._static_decrypt_signature(
1068             s, video_id, player_url, age_gate)
1069
1070     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1071         if age_gate:
1072             # The videos with age protection use another player, so the
1073             # algorithms can be different.
1074             if len(s) == 86:
1075                 return s[2:63] + s[82] + s[64:82] + s[63]
1076
1077         if len(s) == 93:
1078             return s[86:29:-1] + s[88] + s[28:5:-1]
1079         elif len(s) == 92:
1080             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1081         elif len(s) == 91:
1082             return s[84:27:-1] + s[86] + s[26:5:-1]
1083         elif len(s) == 90:
1084             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1085         elif len(s) == 89:
1086             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1087         elif len(s) == 88:
1088             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1089         elif len(s) == 87:
1090             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1091         elif len(s) == 86:
1092             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1093         elif len(s) == 85:
1094             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1095         elif len(s) == 84:
1096             return s[81:36:-1] + s[0] + s[35:2:-1]
1097         elif len(s) == 83:
1098             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1099         elif len(s) == 82:
1100             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1101         elif len(s) == 81:
1102             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1103         elif len(s) == 80:
1104             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1105         elif len(s) == 79:
1106             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1107
1108         else:
1109             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1110
1111     def _decrypt_signature_age_gate(self, s):
1112         # The videos with age protection use another player, so the algorithms
1113         # can be different.
1114         if len(s) == 86:
1115             return s[2:63] + s[82] + s[64:82] + s[63]
1116         else:
1117             # Fallback to the other algortihms
1118             return self._decrypt_signature(s)
1119
1120     def _get_available_subtitles(self, video_id):
1121         try:
1122             sub_list = self._download_webpage(
1123                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1124                 video_id, note=False)
1125         except ExtractorError as err:
1126             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1127             return {}
1128         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1129
1130         sub_lang_list = {}
1131         for l in lang_list:
1132             lang = l[1]
1133             params = compat_urllib_parse.urlencode({
1134                 'lang': lang,
1135                 'v': video_id,
1136                 'fmt': self._downloader.params.get('subtitlesformat'),
1137             })
1138             url = u'http://www.youtube.com/api/timedtext?' + params
1139             sub_lang_list[lang] = url
1140         if not sub_lang_list:
1141             self._downloader.report_warning(u'video doesn\'t have subtitles')
1142             return {}
1143         return sub_lang_list
1144
1145     def _get_available_automatic_caption(self, video_id, webpage):
1146         """We need the webpage for getting the captions url, pass it as an
1147            argument to speed up the process."""
1148         sub_format = self._downloader.params.get('subtitlesformat')
1149         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1150         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1151         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1152         if mobj is None:
1153             self._downloader.report_warning(err_msg)
1154             return {}
1155         player_config = json.loads(mobj.group(1))
1156         try:
1157             args = player_config[u'args']
1158             caption_url = args[u'ttsurl']
1159             timestamp = args[u'timestamp']
1160             # We get the available subtitles
1161             list_params = compat_urllib_parse.urlencode({
1162                 'type': 'list',
1163                 'tlangs': 1,
1164                 'asrs': 1,
1165             })
1166             list_url = caption_url + '&' + list_params
1167             list_page = self._download_webpage(list_url, video_id)
1168             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1169             original_lang_node = caption_list.find('track')
1170             if original_lang_node.attrib.get('kind') != 'asr' :
1171                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1172                 return {}
1173             original_lang = original_lang_node.attrib['lang_code']
1174
1175             sub_lang_list = {}
1176             for lang_node in caption_list.findall('target'):
1177                 sub_lang = lang_node.attrib['lang_code']
1178                 params = compat_urllib_parse.urlencode({
1179                     'lang': original_lang,
1180                     'tlang': sub_lang,
1181                     'fmt': sub_format,
1182                     'ts': timestamp,
1183                     'kind': 'asr',
1184                 })
1185                 sub_lang_list[sub_lang] = caption_url + '&' + params
1186             return sub_lang_list
1187         # An extractor error can be raise by the download process if there are
1188         # no automatic captions but there are subtitles
1189         except (KeyError, ExtractorError):
1190             self._downloader.report_warning(err_msg)
1191             return {}
1192
1193     def _print_formats(self, formats):
1194         print('Available formats:')
1195         for x in formats:
1196             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1197                                         self._video_dimensions.get(x, '???'),
1198                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1199
1200     def _extract_id(self, url):
1201         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1202         if mobj is None:
1203             raise ExtractorError(u'Invalid URL: %s' % url)
1204         video_id = mobj.group(2)
1205         return video_id
1206
1207     def _get_video_url_list(self, url_map):
1208         """
1209         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1210         with the requested formats.
1211         """
1212         req_format = self._downloader.params.get('format', None)
1213         format_limit = self._downloader.params.get('format_limit', None)
1214         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1215         if format_limit is not None and format_limit in available_formats:
1216             format_list = available_formats[available_formats.index(format_limit):]
1217         else:
1218             format_list = available_formats
1219         existing_formats = [x for x in format_list if x in url_map]
1220         if len(existing_formats) == 0:
1221             raise ExtractorError(u'no known formats available for video')
1222         if self._downloader.params.get('listformats', None):
1223             self._print_formats(existing_formats)
1224             return
1225         if req_format is None or req_format == 'best':
1226             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1227         elif req_format == 'worst':
1228             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1229         elif req_format in ('-1', 'all'):
1230             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1231         else:
1232             # Specific formats. We pick the first in a slash-delimeted sequence.
1233             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1234             # available in the specified format. For example,
1235             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1236             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1237             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1238             req_formats = req_format.split('/')
1239             video_url_list = None
1240             for rf in req_formats:
1241                 if rf in url_map:
1242                     video_url_list = [(rf, url_map[rf])]
1243                     break
1244                 if rf in self._video_formats_map:
1245                     for srf in self._video_formats_map[rf]:
1246                         if srf in url_map:
1247                             video_url_list = [(srf, url_map[srf])]
1248                             break
1249                     else:
1250                         continue
1251                     break
1252             if video_url_list is None:
1253                 raise ExtractorError(u'requested format not available')
1254         return video_url_list
1255
1256     def _extract_from_m3u8(self, manifest_url, video_id):
1257         url_map = {}
1258         def _get_urls(_manifest):
1259             lines = _manifest.split('\n')
1260             urls = filter(lambda l: l and not l.startswith('#'),
1261                             lines)
1262             return urls
1263         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1264         formats_urls = _get_urls(manifest)
1265         for format_url in formats_urls:
1266             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1267             url_map[itag] = format_url
1268         return url_map
1269
1270     def _real_extract(self, url):
1271         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1272             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1273
1274         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1275         mobj = re.search(self._NEXT_URL_RE, url)
1276         if mobj:
1277             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1278         video_id = self._extract_id(url)
1279
1280         # Get video webpage
1281         self.report_video_webpage_download(video_id)
1282         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1283         request = compat_urllib_request.Request(url)
1284         try:
1285             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1286         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1287             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1288
1289         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1290
1291         # Attempt to extract SWF player URL
1292         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1293         if mobj is not None:
1294             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1295         else:
1296             player_url = None
1297
1298         # Get video info
1299         self.report_video_info_webpage_download(video_id)
1300         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1301             self.report_age_confirmation()
1302             age_gate = True
1303             # We simulate the access to the video from www.youtube.com/v/{video_id}
1304             # this can be viewed without login into Youtube
1305             data = compat_urllib_parse.urlencode({'video_id': video_id,
1306                                                   'el': 'embedded',
1307                                                   'gl': 'US',
1308                                                   'hl': 'en',
1309                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1310                                                   'asv': 3,
1311                                                   'sts':'1588',
1312                                                   })
1313             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1314             video_info_webpage = self._download_webpage(video_info_url, video_id,
1315                                     note=False,
1316                                     errnote='unable to download video info webpage')
1317             video_info = compat_parse_qs(video_info_webpage)
1318         else:
1319             age_gate = False
1320             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1321                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1322                         % (video_id, el_type))
1323                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1324                                         note=False,
1325                                         errnote='unable to download video info webpage')
1326                 video_info = compat_parse_qs(video_info_webpage)
1327                 if 'token' in video_info:
1328                     break
1329         if 'token' not in video_info:
1330             if 'reason' in video_info:
1331                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1332             else:
1333                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1334
1335         # Check for "rental" videos
1336         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1337             raise ExtractorError(u'"rental" videos not supported')
1338
1339         # Start extracting information
1340         self.report_information_extraction(video_id)
1341
1342         # uploader
1343         if 'author' not in video_info:
1344             raise ExtractorError(u'Unable to extract uploader name')
1345         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1346
1347         # uploader_id
1348         video_uploader_id = None
1349         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1350         if mobj is not None:
1351             video_uploader_id = mobj.group(1)
1352         else:
1353             self._downloader.report_warning(u'unable to extract uploader nickname')
1354
1355         # title
1356         if 'title' not in video_info:
1357             raise ExtractorError(u'Unable to extract video title')
1358         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1359
1360         # thumbnail image
1361         # We try first to get a high quality image:
1362         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1363                             video_webpage, re.DOTALL)
1364         if m_thumb is not None:
1365             video_thumbnail = m_thumb.group(1)
1366         elif 'thumbnail_url' not in video_info:
1367             self._downloader.report_warning(u'unable to extract video thumbnail')
1368             video_thumbnail = ''
1369         else:   # don't panic if we can't find it
1370             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1371
1372         # upload date
1373         upload_date = None
1374         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1375         if mobj is not None:
1376             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1377             upload_date = unified_strdate(upload_date)
1378
1379         # description
1380         video_description = get_element_by_id("eow-description", video_webpage)
1381         if video_description:
1382             video_description = clean_html(video_description)
1383         else:
1384             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1385             if fd_mobj:
1386                 video_description = unescapeHTML(fd_mobj.group(1))
1387             else:
1388                 video_description = u''
1389
1390         # subtitles
1391         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1392
1393         if self._downloader.params.get('listsubtitles', False):
1394             self._list_available_subtitles(video_id, video_webpage)
1395             return
1396
1397         if 'length_seconds' not in video_info:
1398             self._downloader.report_warning(u'unable to extract video duration')
1399             video_duration = ''
1400         else:
1401             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1402
1403         # Decide which formats to download
1404
1405         try:
1406             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1407             if not mobj:
1408                 raise ValueError('Could not find vevo ID')
1409             info = json.loads(mobj.group(1))
1410             args = info['args']
1411             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1412             # this signatures are encrypted
1413             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1414             if m_s is not None:
1415                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1416                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1417             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1418             if m_s is not None:
1419                 if 'url_encoded_fmt_stream_map' in video_info:
1420                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1421                 else:
1422                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1423             elif 'adaptive_fmts' in video_info:
1424                 if 'url_encoded_fmt_stream_map' in video_info:
1425                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1426                 else:
1427                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1428         except ValueError:
1429             pass
1430
1431         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1432             self.report_rtmp_download()
1433             video_url_list = [(None, video_info['conn'][0])]
1434         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1435             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1436                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1437             url_map = {}
1438             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1439                 url_data = compat_parse_qs(url_data_str)
1440                 if 'itag' in url_data and 'url' in url_data:
1441                     url = url_data['url'][0]
1442                     if 'sig' in url_data:
1443                         url += '&signature=' + url_data['sig'][0]
1444                     elif 's' in url_data:
1445                         encrypted_sig = url_data['s'][0]
1446                         if self._downloader.params.get('verbose'):
1447                             if age_gate:
1448                                 player_version = self._search_regex(
1449                                     r'-(.+)\.swf$',
1450                                     player_url if player_url else None,
1451                                     'flash player', fatal=False)
1452                                 player_desc = 'flash player %s' % player_version
1453                             else:
1454                                 player_version = self._search_regex(
1455                                     r'html5player-(.+?)\.js', video_webpage,
1456                                     'html5 player', fatal=False)
1457                                 player_desc = u'html5 player %s' % player_version
1458
1459                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1460                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1461                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1462
1463                         if not age_gate:
1464                             jsplayer_url_json = self._search_regex(
1465                                 r'"assets":.+?"js":\s*("[^"]+")',
1466                                 video_webpage, u'JS player URL')
1467                             player_url = json.loads(jsplayer_url_json)
1468
1469                         signature = self._decrypt_signature(
1470                             encrypted_sig, video_id, player_url, age_gate)
1471                         url += '&signature=' + signature
1472                     if 'ratebypass' not in url:
1473                         url += '&ratebypass=yes'
1474                     url_map[url_data['itag'][0]] = url
1475             video_url_list = self._get_video_url_list(url_map)
1476             if not video_url_list:
1477                 return
1478         elif video_info.get('hlsvp'):
1479             manifest_url = video_info['hlsvp'][0]
1480             url_map = self._extract_from_m3u8(manifest_url, video_id)
1481             video_url_list = self._get_video_url_list(url_map)
1482             if not video_url_list:
1483                 return
1484
1485         else:
1486             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1487
1488         results = []
1489         for format_param, video_real_url in video_url_list:
1490             # Extension
1491             video_extension = self._video_extensions.get(format_param, 'flv')
1492
1493             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1494                                               self._video_dimensions.get(format_param, '???'),
1495                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1496
1497             results.append({
1498                 'id':       video_id,
1499                 'url':      video_real_url,
1500                 'uploader': video_uploader,
1501                 'uploader_id': video_uploader_id,
1502                 'upload_date':  upload_date,
1503                 'title':    video_title,
1504                 'ext':      video_extension,
1505                 'format':   video_format,
1506                 'thumbnail':    video_thumbnail,
1507                 'description':  video_description,
1508                 'player_url':   player_url,
1509                 'subtitles':    video_subtitles,
1510                 'duration':     video_duration
1511             })
1512         return results
1513
1514 class YoutubePlaylistIE(InfoExtractor):
1515     IE_DESC = u'YouTube.com playlists'
1516     _VALID_URL = r"""(?:
1517                         (?:https?://)?
1518                         (?:\w+\.)?
1519                         youtube\.com/
1520                         (?:
1521                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1522                            \? (?:.*?&)*? (?:p|a|list)=
1523                         |  p/
1524                         )
1525                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1526                         .*
1527                      |
1528                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1529                      )"""
1530     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1531     _MAX_RESULTS = 50
1532     IE_NAME = u'youtube:playlist'
1533
1534     @classmethod
1535     def suitable(cls, url):
1536         """Receives a URL and returns True if suitable for this IE."""
1537         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1538
1539     def _real_extract(self, url):
1540         # Extract playlist id
1541         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1542         if mobj is None:
1543             raise ExtractorError(u'Invalid URL: %s' % url)
1544
1545         # Download playlist videos from API
1546         playlist_id = mobj.group(1) or mobj.group(2)
1547         videos = []
1548
1549         for page_num in itertools.count(1):
1550             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1551             if start_index >= 1000:
1552                 self._downloader.report_warning(u'Max number of results reached')
1553                 break
1554             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1555             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1556
1557             try:
1558                 response = json.loads(page)
1559             except ValueError as err:
1560                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1561
1562             if 'feed' not in response:
1563                 raise ExtractorError(u'Got a malformed response from YouTube API')
1564             playlist_title = response['feed']['title']['$t']
1565             if 'entry' not in response['feed']:
1566                 # Number of videos is a multiple of self._MAX_RESULTS
1567                 break
1568
1569             for entry in response['feed']['entry']:
1570                 index = entry['yt$position']['$t']
1571                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1572                     videos.append((
1573                         index,
1574                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1575                     ))
1576
1577         videos = [v[1] for v in sorted(videos)]
1578
1579         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1580         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1581
1582
1583 class YoutubeChannelIE(InfoExtractor):
1584     IE_DESC = u'YouTube.com channels'
1585     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1586     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1587     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1588     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1589     IE_NAME = u'youtube:channel'
1590
1591     def extract_videos_from_page(self, page):
1592         ids_in_page = []
1593         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1594             if mobj.group(1) not in ids_in_page:
1595                 ids_in_page.append(mobj.group(1))
1596         return ids_in_page
1597
1598     def _real_extract(self, url):
1599         # Extract channel id
1600         mobj = re.match(self._VALID_URL, url)
1601         if mobj is None:
1602             raise ExtractorError(u'Invalid URL: %s' % url)
1603
1604         # Download channel page
1605         channel_id = mobj.group(1)
1606         video_ids = []
1607         pagenum = 1
1608
1609         url = self._TEMPLATE_URL % (channel_id, pagenum)
1610         page = self._download_webpage(url, channel_id,
1611                                       u'Downloading page #%s' % pagenum)
1612
1613         # Extract video identifiers
1614         ids_in_page = self.extract_videos_from_page(page)
1615         video_ids.extend(ids_in_page)
1616
1617         # Download any subsequent channel pages using the json-based channel_ajax query
1618         if self._MORE_PAGES_INDICATOR in page:
1619             for pagenum in itertools.count(1):
1620                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1621                 page = self._download_webpage(url, channel_id,
1622                                               u'Downloading page #%s' % pagenum)
1623
1624                 page = json.loads(page)
1625
1626                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1627                 video_ids.extend(ids_in_page)
1628
1629                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1630                     break
1631
1632         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1633
1634         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1635         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1636         return [self.playlist_result(url_entries, channel_id)]
1637
1638
1639 class YoutubeUserIE(InfoExtractor):
1640     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1641     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1642     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1643     _GDATA_PAGE_SIZE = 50
1644     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1645     IE_NAME = u'youtube:user'
1646
1647     @classmethod
1648     def suitable(cls, url):
1649         # Don't return True if the url can be extracted with other youtube
1650         # extractor, the regex would is too permissive and it would match.
1651         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1652         if any(ie.suitable(url) for ie in other_ies): return False
1653         else: return super(YoutubeUserIE, cls).suitable(url)
1654
1655     def _real_extract(self, url):
1656         # Extract username
1657         mobj = re.match(self._VALID_URL, url)
1658         if mobj is None:
1659             raise ExtractorError(u'Invalid URL: %s' % url)
1660
1661         username = mobj.group(1)
1662
1663         # Download video ids using YouTube Data API. Result size per
1664         # query is limited (currently to 50 videos) so we need to query
1665         # page by page until there are no video ids - it means we got
1666         # all of them.
1667
1668         video_ids = []
1669
1670         for pagenum in itertools.count(0):
1671             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1672
1673             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1674             page = self._download_webpage(gdata_url, username,
1675                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1676
1677             try:
1678                 response = json.loads(page)
1679             except ValueError as err:
1680                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1681             if 'entry' not in response['feed']:
1682                 # Number of videos is a multiple of self._MAX_RESULTS
1683                 break
1684
1685             # Extract video identifiers
1686             ids_in_page = []
1687             for entry in response['feed']['entry']:
1688                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1689             video_ids.extend(ids_in_page)
1690
1691             # A little optimization - if current page is not
1692             # "full", ie. does not contain PAGE_SIZE video ids then
1693             # we can assume that this page is the last one - there
1694             # are no more ids on further pages - no need to query
1695             # again.
1696
1697             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1698                 break
1699
1700         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1701         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1702         return [self.playlist_result(url_results, playlist_title = username)]
1703
1704 class YoutubeSearchIE(SearchInfoExtractor):
1705     IE_DESC = u'YouTube.com searches'
1706     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1707     _MAX_RESULTS = 1000
1708     IE_NAME = u'youtube:search'
1709     _SEARCH_KEY = 'ytsearch'
1710
1711     def report_download_page(self, query, pagenum):
1712         """Report attempt to download search page with given number."""
1713         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1714
1715     def _get_n_results(self, query, n):
1716         """Get a specified number of results for a query"""
1717
1718         video_ids = []
1719         pagenum = 0
1720         limit = n
1721
1722         while (50 * pagenum) < limit:
1723             self.report_download_page(query, pagenum+1)
1724             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1725             request = compat_urllib_request.Request(result_url)
1726             try:
1727                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1728             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1729                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1730             api_response = json.loads(data)['data']
1731
1732             if not 'items' in api_response:
1733                 raise ExtractorError(u'[youtube] No video results')
1734
1735             new_ids = list(video['id'] for video in api_response['items'])
1736             video_ids += new_ids
1737
1738             limit = min(n, api_response['totalItems'])
1739             pagenum += 1
1740
1741         if len(video_ids) > n:
1742             video_ids = video_ids[:n]
1743         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1744         return self.playlist_result(videos, query)
1745
1746
1747 class YoutubeShowIE(InfoExtractor):
1748     IE_DESC = u'YouTube.com (multi-season) shows'
1749     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1750     IE_NAME = u'youtube:show'
1751
1752     def _real_extract(self, url):
1753         mobj = re.match(self._VALID_URL, url)
1754         show_name = mobj.group(1)
1755         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1756         # There's one playlist for each season of the show
1757         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1758         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1759         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1760
1761
1762 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1763     """
1764     Base class for extractors that fetch info from
1765     http://www.youtube.com/feed_ajax
1766     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1767     """
1768     _LOGIN_REQUIRED = True
1769     _PAGING_STEP = 30
1770     # use action_load_personal_feed instead of action_load_system_feed
1771     _PERSONAL_FEED = False
1772
1773     @property
1774     def _FEED_TEMPLATE(self):
1775         action = 'action_load_system_feed'
1776         if self._PERSONAL_FEED:
1777             action = 'action_load_personal_feed'
1778         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1779
1780     @property
1781     def IE_NAME(self):
1782         return u'youtube:%s' % self._FEED_NAME
1783
1784     def _real_initialize(self):
1785         self._login()
1786
1787     def _real_extract(self, url):
1788         feed_entries = []
1789         # The step argument is available only in 2.7 or higher
1790         for i in itertools.count(0):
1791             paging = i*self._PAGING_STEP
1792             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1793                                           u'%s feed' % self._FEED_NAME,
1794                                           u'Downloading page %s' % i)
1795             info = json.loads(info)
1796             feed_html = info['feed_html']
1797             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1798             ids = orderedSet(m.group(1) for m in m_ids)
1799             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1800             if info['paging'] is None:
1801                 break
1802         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1803
1804 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1805     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1806     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1807     _FEED_NAME = 'subscriptions'
1808     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1809
1810 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1811     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1812     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1813     _FEED_NAME = 'recommended'
1814     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1815
1816 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1817     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1818     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1819     _FEED_NAME = 'watch_later'
1820     _PLAYLIST_TITLE = u'Youtube Watch Later'
1821     _PAGING_STEP = 100
1822     _PERSONAL_FEED = True
1823
1824 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1825     IE_NAME = u'youtube:favorites'
1826     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1827     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1828     _LOGIN_REQUIRED = True
1829
1830     def _real_extract(self, url):
1831         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1832         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1833         return self.url_result(playlist_id, 'YoutubePlaylist')