_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381         {
 382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
 383             u'file': u'TGi3HqYrWHE.mp4',
 384             u'note': u'm3u8 video',
 385             u'info_dict': {
 386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
 387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
 388                 u'uploader': u'olympic',
 389                 u'upload_date': u'20120807',
 390                 u'uploader_id': u'olympic',
 391             },
 392             u'params': {
 393                 u'skip_download': True,
 394             },
 395         },
 396     ]
 397
 398
 399     @classmethod
 400     def suitable(cls, url):
 401         """Receives a URL and returns True if suitable for this IE."""
 402         if YoutubePlaylistIE.suitable(url): return False
 403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 404
 405     def __init__(self, *args, **kwargs):
 406         super(YoutubeIE, self).__init__(*args, **kwargs)
 407         self._player_cache = {}
 408
 409     def report_video_webpage_download(self, video_id):
 410         """Report attempt to download video webpage."""
 411         self.to_screen(u'%s: Downloading video webpage' % video_id)
 412
 413     def report_video_info_webpage_download(self, video_id):
 414         """Report attempt to download video info webpage."""
 415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 416
 417     def report_information_extraction(self, video_id):
 418         """Report attempt to extract video information."""
 419         self.to_screen(u'%s: Extracting video information' % video_id)
 420
 421     def report_unavailable_format(self, video_id, format):
 422         """Report extracted video URL."""
 423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 424
 425     def report_rtmp_download(self):
 426         """Indicate the download will use the RTMP protocol."""
 427         self.to_screen(u'RTMP download detected')
 428
 429     def _extract_signature_function(self, video_id, player_url, slen):
 430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 431                         player_url)
 432         player_type = id_m.group('ext')
 433         player_id = id_m.group('id')
 434
 435         # Read from filesystem cache
 436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 437         assert os.path.basename(func_id) == func_id
 438         cache_dir = self._downloader.params.get('cachedir',
 439                                                 u'~/.youtube-dl/cache')
 440
 441         cache_enabled = cache_dir != u'NONE'
 442         if cache_enabled:
 443             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 444                                     u'youtube-sigfuncs',
 445                                     func_id + '.json')
 446             try:
 447                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 448                     cache_spec = json.load(cachef)
 449                 return lambda s: u''.join(s[i] for i in cache_spec)
 450             except IOError:
 451                 pass  # No cache available
 452
 453         if player_type == 'js':
 454             code = self._download_webpage(
 455                 player_url, video_id,
 456                 note=u'Downloading %s player %s' % (player_type, player_id),
 457                 errnote=u'Download of %s failed' % player_url)
 458             res = self._parse_sig_js(code)
 459         elif player_type == 'swf':
 460             urlh = self._request_webpage(
 461                 player_url, video_id,
 462                 note=u'Downloading %s player %s' % (player_type, player_id),
 463                 errnote=u'Download of %s failed' % player_url)
 464             code = urlh.read()
 465             res = self._parse_sig_swf(code)
 466         else:
 467             assert False, 'Invalid player type %r' % player_type
 468
 469         if cache_enabled:
 470             try:
 471                 cache_res = res(map(compat_chr, range(slen)))
 472                 cache_spec = [ord(c) for c in cache_res]
 473                 try:
 474                     os.makedirs(os.path.dirname(cache_fn))
 475                 except OSError as ose:
 476                     if ose.errno != errno.EEXIST:
 477                         raise
 478                 write_json_file(cache_spec, cache_fn)
 479             except Exception:
 480                 tb = traceback.format_exc()
 481                 self._downloader.report_warning(
 482                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 483
 484         return res
 485
 486     def _print_sig_code(self, func, slen):
 487         def gen_sig_code(idxs):
 488             def _genslice(start, end, step):
 489                 starts = u'' if start == 0 else str(start)
 490                 ends = u':%d' % (end+step)
 491                 steps = u'' if step == 1 else (':%d' % step)
 492                 return u's[%s%s%s]' % (starts, ends, steps)
 493
 494             step = None
 495             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 496                                     # set as soon as step is set
 497             for i, prev in zip(idxs[1:], idxs[:-1]):
 498                 if step is not None:
 499                     if i - prev == step:
 500                         continue
 501                     yield _genslice(start, prev, step)
 502                     step = None
 503                     continue
 504                 if i - prev in [-1, 1]:
 505                     step = i - prev
 506                     start = prev
 507                     continue
 508                 else:
 509                     yield u's[%d]' % prev
 510             if step is None:
 511                 yield u's[%d]' % i
 512             else:
 513                 yield _genslice(start, i, step)
 514
 515         cache_res = func(map(compat_chr, range(slen)))
 516         cache_spec = [ord(c) for c in cache_res]
 517         expr_code = u' + '.join(gen_sig_code(cache_spec))
 518         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 519         self.to_screen(u'Extracted signature function:\n' + code)
 520
 521     def _parse_sig_js(self, jscode):
 522         funcname = self._search_regex(
 523             r'signature=([a-zA-Z]+)', jscode,
 524             u'Initial JS player signature function name')
 525
 526         functions = {}
 527
 528         def argidx(varname):
 529             return string.lowercase.index(varname)
 530
 531         def interpret_statement(stmt, local_vars, allow_recursion=20):
 532             if allow_recursion < 0:
 533                 raise ExtractorError(u'Recursion limit reached')
 534
 535             if stmt.startswith(u'var '):
 536                 stmt = stmt[len(u'var '):]
 537             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 538                              r'=(?P<expr>.*)$', stmt)
 539             if ass_m:
 540                 if ass_m.groupdict().get('index'):
 541                     def assign(val):
 542                         lvar = local_vars[ass_m.group('out')]
 543                         idx = interpret_expression(ass_m.group('index'),
 544                                                    local_vars, allow_recursion)
 545                         assert isinstance(idx, int)
 546                         lvar[idx] = val
 547                         return val
 548                     expr = ass_m.group('expr')
 549                 else:
 550                     def assign(val):
 551                         local_vars[ass_m.group('out')] = val
 552                         return val
 553                     expr = ass_m.group('expr')
 554             elif stmt.startswith(u'return '):
 555                 assign = lambda v: v
 556                 expr = stmt[len(u'return '):]
 557             else:
 558                 raise ExtractorError(
 559                     u'Cannot determine left side of statement in %r' % stmt)
 560
 561             v = interpret_expression(expr, local_vars, allow_recursion)
 562             return assign(v)
 563
 564         def interpret_expression(expr, local_vars, allow_recursion):
 565             if expr.isdigit():
 566                 return int(expr)
 567
 568             if expr.isalpha():
 569                 return local_vars[expr]
 570
 571             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 572             if m:
 573                 member = m.group('member')
 574                 val = local_vars[m.group('in')]
 575                 if member == 'split("")':
 576                     return list(val)
 577                 if member == 'join("")':
 578                     return u''.join(val)
 579                 if member == 'length':
 580                     return len(val)
 581                 if member == 'reverse()':
 582                     return val[::-1]
 583                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 584                 if slice_m:
 585                     idx = interpret_expression(
 586                         slice_m.group('idx'), local_vars, allow_recursion-1)
 587                     return val[idx:]
 588
 589             m = re.match(
 590                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 591             if m:
 592                 val = local_vars[m.group('in')]
 593                 idx = interpret_expression(m.group('idx'), local_vars,
 594                                            allow_recursion-1)
 595                 return val[idx]
 596
 597             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 598             if m:
 599                 a = interpret_expression(m.group('a'),
 600                                          local_vars, allow_recursion)
 601                 b = interpret_expression(m.group('b'),
 602                                          local_vars, allow_recursion)
 603                 return a % b
 604
 605             m = re.match(
 606                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 607             if m:
 608                 fname = m.group('func')
 609                 if fname not in functions:
 610                     functions[fname] = extract_function(fname)
 611                 argvals = [int(v) if v.isdigit() else local_vars[v]
 612                            for v in m.group('args').split(',')]
 613                 return functions[fname](argvals)
 614             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 615
 616         def extract_function(funcname):
 617             func_m = re.search(
 618                 r'function ' + re.escape(funcname) +
 619                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 620                 jscode)
 621             argnames = func_m.group('args').split(',')
 622
 623             def resf(args):
 624                 local_vars = dict(zip(argnames, args))
 625                 for stmt in func_m.group('code').split(';'):
 626                     res = interpret_statement(stmt, local_vars)
 627                 return res
 628             return resf
 629
 630         initial_function = extract_function(funcname)
 631         return lambda s: initial_function([s])
 632
 633     def _parse_sig_swf(self, file_contents):
 634         if file_contents[1:3] != b'WS':
 635             raise ExtractorError(
 636                 u'Not an SWF file; header is %r' % file_contents[:3])
 637         if file_contents[:1] == b'C':
 638             content = zlib.decompress(file_contents[8:])
 639         else:
 640             raise NotImplementedError(u'Unsupported compression format %r' %
 641                                       file_contents[:1])
 642
 643         def extract_tags(content):
 644             pos = 0
 645             while pos < len(content):
 646                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 647                 pos += 2
 648                 tag_code = header16 >> 6
 649                 tag_len = header16 & 0x3f
 650                 if tag_len == 0x3f:
 651                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 652                     pos += 4
 653                 assert pos+tag_len <= len(content)
 654                 yield (tag_code, content[pos:pos+tag_len])
 655                 pos += tag_len
 656
 657         code_tag = next(tag
 658                         for tag_code, tag in extract_tags(content)
 659                         if tag_code == 82)
 660         p = code_tag.index(b'\0', 4) + 1
 661         code_reader = io.BytesIO(code_tag[p:])
 662
 663         # Parse ABC (AVM2 ByteCode)
 664         def read_int(reader=None):
 665             if reader is None:
 666                 reader = code_reader
 667             res = 0
 668             shift = 0
 669             for _ in range(5):
 670                 buf = reader.read(1)
 671                 assert len(buf) == 1
 672                 b = struct.unpack('<B', buf)[0]
 673                 res = res | ((b & 0x7f) << shift)
 674                 if b & 0x80 == 0:
 675                     break
 676                 shift += 7
 677             return res
 678
 679         def u30(reader=None):
 680             res = read_int(reader)
 681             assert res & 0xf0000000 == 0
 682             return res
 683         u32 = read_int
 684
 685         def s32(reader=None):
 686             v = read_int(reader)
 687             if v & 0x80000000 != 0:
 688                 v = - ((v ^ 0xffffffff) + 1)
 689             return v
 690
 691         def read_string(reader=None):
 692             if reader is None:
 693                 reader = code_reader
 694             slen = u30(reader)
 695             resb = reader.read(slen)
 696             assert len(resb) == slen
 697             return resb.decode('utf-8')
 698
 699         def read_bytes(count, reader=None):
 700             if reader is None:
 701                 reader = code_reader
 702             resb = reader.read(count)
 703             assert len(resb) == count
 704             return resb
 705
 706         def read_byte(reader=None):
 707             resb = read_bytes(1, reader=reader)
 708             res = struct.unpack('<B', resb)[0]
 709             return res
 710
 711         # minor_version + major_version
 712         read_bytes(2 + 2)
 713
 714         # Constant pool
 715         int_count = u30()
 716         for _c in range(1, int_count):
 717             s32()
 718         uint_count = u30()
 719         for _c in range(1, uint_count):
 720             u32()
 721         double_count = u30()
 722         read_bytes((double_count-1) * 8)
 723         string_count = u30()
 724         constant_strings = [u'']
 725         for _c in range(1, string_count):
 726             s = read_string()
 727             constant_strings.append(s)
 728         namespace_count = u30()
 729         for _c in range(1, namespace_count):
 730             read_bytes(1)  # kind
 731             u30()  # name
 732         ns_set_count = u30()
 733         for _c in range(1, ns_set_count):
 734             count = u30()
 735             for _c2 in range(count):
 736                 u30()
 737         multiname_count = u30()
 738         MULTINAME_SIZES = {
 739             0x07: 2,  # QName
 740             0x0d: 2,  # QNameA
 741             0x0f: 1,  # RTQName
 742             0x10: 1,  # RTQNameA
 743             0x11: 0,  # RTQNameL
 744             0x12: 0,  # RTQNameLA
 745             0x09: 2,  # Multiname
 746             0x0e: 2,  # MultinameA
 747             0x1b: 1,  # MultinameL
 748             0x1c: 1,  # MultinameLA
 749         }
 750         multinames = [u'']
 751         for _c in range(1, multiname_count):
 752             kind = u30()
 753             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 754             if kind == 0x07:
 755                 u30()  # namespace_idx
 756                 name_idx = u30()
 757                 multinames.append(constant_strings[name_idx])
 758             else:
 759                 multinames.append('[MULTINAME kind: %d]' % kind)
 760                 for _c2 in range(MULTINAME_SIZES[kind]):
 761                     u30()
 762
 763         # Methods
 764         method_count = u30()
 765         MethodInfo = collections.namedtuple(
 766             'MethodInfo',
 767             ['NEED_ARGUMENTS', 'NEED_REST'])
 768         method_infos = []
 769         for method_id in range(method_count):
 770             param_count = u30()
 771             u30()  # return type
 772             for _ in range(param_count):
 773                 u30()  # param type
 774             u30()  # name index (always 0 for youtube)
 775             flags = read_byte()
 776             if flags & 0x08 != 0:
 777                 # Options present
 778                 option_count = u30()
 779                 for c in range(option_count):
 780                     u30()  # val
 781                     read_bytes(1)  # kind
 782             if flags & 0x80 != 0:
 783                 # Param names present
 784                 for _ in range(param_count):
 785                     u30()  # param name
 786             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 787             method_infos.append(mi)
 788
 789         # Metadata
 790         metadata_count = u30()
 791         for _c in range(metadata_count):
 792             u30()  # name
 793             item_count = u30()
 794             for _c2 in range(item_count):
 795                 u30()  # key
 796                 u30()  # value
 797
 798         def parse_traits_info():
 799             trait_name_idx = u30()
 800             kind_full = read_byte()
 801             kind = kind_full & 0x0f
 802             attrs = kind_full >> 4
 803             methods = {}
 804             if kind in [0x00, 0x06]:  # Slot or Const
 805                 u30()  # Slot id
 806                 u30()  # type_name_idx
 807                 vindex = u30()
 808                 if vindex != 0:
 809                     read_byte()  # vkind
 810             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 811                 u30()  # disp_id
 812                 method_idx = u30()
 813                 methods[multinames[trait_name_idx]] = method_idx
 814             elif kind == 0x04:  # Class
 815                 u30()  # slot_id
 816                 u30()  # classi
 817             elif kind == 0x05:  # Function
 818                 u30()  # slot_id
 819                 function_idx = u30()
 820                 methods[function_idx] = multinames[trait_name_idx]
 821             else:
 822                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 823
 824             if attrs & 0x4 != 0:  # Metadata present
 825                 metadata_count = u30()
 826                 for _c3 in range(metadata_count):
 827                     u30()  # metadata index
 828
 829             return methods
 830
 831         # Classes
 832         TARGET_CLASSNAME = u'SignatureDecipher'
 833         searched_idx = multinames.index(TARGET_CLASSNAME)
 834         searched_class_id = None
 835         class_count = u30()
 836         for class_id in range(class_count):
 837             name_idx = u30()
 838             if name_idx == searched_idx:
 839                 # We found the class we're looking for!
 840                 searched_class_id = class_id
 841             u30()  # super_name idx
 842             flags = read_byte()
 843             if flags & 0x08 != 0:  # Protected namespace is present
 844                 u30()  # protected_ns_idx
 845             intrf_count = u30()
 846             for _c2 in range(intrf_count):
 847                 u30()
 848             u30()  # iinit
 849             trait_count = u30()
 850             for _c2 in range(trait_count):
 851                 parse_traits_info()
 852
 853         if searched_class_id is None:
 854             raise ExtractorError(u'Target class %r not found' %
 855                                  TARGET_CLASSNAME)
 856
 857         method_names = {}
 858         method_idxs = {}
 859         for class_id in range(class_count):
 860             u30()  # cinit
 861             trait_count = u30()
 862             for _c2 in range(trait_count):
 863                 trait_methods = parse_traits_info()
 864                 if class_id == searched_class_id:
 865                     method_names.update(trait_methods.items())
 866                     method_idxs.update(dict(
 867                         (idx, name)
 868                         for name, idx in trait_methods.items()))
 869
 870         # Scripts
 871         script_count = u30()
 872         for _c in range(script_count):
 873             u30()  # init
 874             trait_count = u30()
 875             for _c2 in range(trait_count):
 876                 parse_traits_info()
 877
 878         # Method bodies
 879         method_body_count = u30()
 880         Method = collections.namedtuple('Method', ['code', 'local_count'])
 881         methods = {}
 882         for _c in range(method_body_count):
 883             method_idx = u30()
 884             u30()  # max_stack
 885             local_count = u30()
 886             u30()  # init_scope_depth
 887             u30()  # max_scope_depth
 888             code_length = u30()
 889             code = read_bytes(code_length)
 890             if method_idx in method_idxs:
 891                 m = Method(code, local_count)
 892                 methods[method_idxs[method_idx]] = m
 893             exception_count = u30()
 894             for _c2 in range(exception_count):
 895                 u30()  # from
 896                 u30()  # to
 897                 u30()  # target
 898                 u30()  # exc_type
 899                 u30()  # var_name
 900             trait_count = u30()
 901             for _c2 in range(trait_count):
 902                 parse_traits_info()
 903
 904         assert p + code_reader.tell() == len(code_tag)
 905         assert len(methods) == len(method_idxs)
 906
 907         method_pyfunctions = {}
 908
 909         def extract_function(func_name):
 910             if func_name in method_pyfunctions:
 911                 return method_pyfunctions[func_name]
 912             if func_name not in methods:
 913                 raise ExtractorError(u'Cannot find function %r' % func_name)
 914             m = methods[func_name]
 915
 916             def resfunc(args):
 917                 registers = ['(this)'] + list(args) + [None] * m.local_count
 918                 stack = []
 919                 coder = io.BytesIO(m.code)
 920                 while True:
 921                     opcode = struct.unpack('!B', coder.read(1))[0]
 922                     if opcode == 36:  # pushbyte
 923                         v = struct.unpack('!B', coder.read(1))[0]
 924                         stack.append(v)
 925                     elif opcode == 44:  # pushstring
 926                         idx = u30(coder)
 927                         stack.append(constant_strings[idx])
 928                     elif opcode == 48:  # pushscope
 929                         # We don't implement the scope register, so we'll just
 930                         # ignore the popped value
 931                         stack.pop()
 932                     elif opcode == 70:  # callproperty
 933                         index = u30(coder)
 934                         mname = multinames[index]
 935                         arg_count = u30(coder)
 936                         args = list(reversed(
 937                             [stack.pop() for _ in range(arg_count)]))
 938                         obj = stack.pop()
 939                         if mname == u'split':
 940                             assert len(args) == 1
 941                             assert isinstance(args[0], compat_str)
 942                             assert isinstance(obj, compat_str)
 943                             if args[0] == u'':
 944                                 res = list(obj)
 945                             else:
 946                                 res = obj.split(args[0])
 947                             stack.append(res)
 948                         elif mname == u'slice':
 949                             assert len(args) == 1
 950                             assert isinstance(args[0], int)
 951                             assert isinstance(obj, list)
 952                             res = obj[args[0]:]
 953                             stack.append(res)
 954                         elif mname == u'join':
 955                             assert len(args) == 1
 956                             assert isinstance(args[0], compat_str)
 957                             assert isinstance(obj, list)
 958                             res = args[0].join(obj)
 959                             stack.append(res)
 960                         elif mname in method_pyfunctions:
 961                             stack.append(method_pyfunctions[mname](args))
 962                         else:
 963                             raise NotImplementedError(
 964                                 u'Unsupported property %r on %r'
 965                                 % (mname, obj))
 966                     elif opcode == 72:  # returnvalue
 967                         res = stack.pop()
 968                         return res
 969                     elif opcode == 79:  # callpropvoid
 970                         index = u30(coder)
 971                         mname = multinames[index]
 972                         arg_count = u30(coder)
 973                         args = list(reversed(
 974                             [stack.pop() for _ in range(arg_count)]))
 975                         obj = stack.pop()
 976                         if mname == u'reverse':
 977                             assert isinstance(obj, list)
 978                             obj.reverse()
 979                         else:
 980                             raise NotImplementedError(
 981                                 u'Unsupported (void) property %r on %r'
 982                                 % (mname, obj))
 983                     elif opcode == 93:  # findpropstrict
 984                         index = u30(coder)
 985                         mname = multinames[index]
 986                         res = extract_function(mname)
 987                         stack.append(res)
 988                     elif opcode == 97:  # setproperty
 989                         index = u30(coder)
 990                         value = stack.pop()
 991                         idx = stack.pop()
 992                         obj = stack.pop()
 993                         assert isinstance(obj, list)
 994                         assert isinstance(idx, int)
 995                         obj[idx] = value
 996                     elif opcode == 98:  # getlocal
 997                         index = u30(coder)
 998                         stack.append(registers[index])
 999                     elif opcode == 99:  # setlocal
1000                         index = u30(coder)
1001                         value = stack.pop()
1002                         registers[index] = value
1003                     elif opcode == 102:  # getproperty
1004                         index = u30(coder)
1005                         pname = multinames[index]
1006                         if pname == u'length':
1007                             obj = stack.pop()
1008                             assert isinstance(obj, list)
1009                             stack.append(len(obj))
1010                         else:  # Assume attribute access
1011                             idx = stack.pop()
1012                             assert isinstance(idx, int)
1013                             obj = stack.pop()
1014                             assert isinstance(obj, list)
1015                             stack.append(obj[idx])
1016                     elif opcode == 128:  # coerce
1017                         u30(coder)
1018                     elif opcode == 133:  # coerce_s
1019                         assert isinstance(stack[-1], (type(None), compat_str))
1020                     elif opcode == 164:  # modulo
1021                         value2 = stack.pop()
1022                         value1 = stack.pop()
1023                         res = value1 % value2
1024                         stack.append(res)
1025                     elif opcode == 208:  # getlocal_0
1026                         stack.append(registers[0])
1027                     elif opcode == 209:  # getlocal_1
1028                         stack.append(registers[1])
1029                     elif opcode == 210:  # getlocal_2
1030                         stack.append(registers[2])
1031                     elif opcode == 211:  # getlocal_3
1032                         stack.append(registers[3])
1033                     elif opcode == 214:  # setlocal_2
1034                         registers[2] = stack.pop()
1035                     elif opcode == 215:  # setlocal_3
1036                         registers[3] = stack.pop()
1037                     else:
1038                         raise NotImplementedError(
1039                             u'Unsupported opcode %d' % opcode)
1040
1041             method_pyfunctions[func_name] = resfunc
1042             return resfunc
1043
1044         initial_function = extract_function(u'decipher')
1045         return lambda s: initial_function([s])
1046
1047     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1048         """Turn the encrypted s field into a working signature"""
1049
1050         if player_url is not None:
1051             try:
1052                 if player_url not in self._player_cache:
1053                     func = self._extract_signature_function(
1054                         video_id, player_url, len(s)
1055                     )
1056                     self._player_cache[player_url] = func
1057                 func = self._player_cache[player_url]
1058                 if self._downloader.params.get('youtube_print_sig_code'):
1059                     self._print_sig_code(func, len(s))
1060                 return func(s)
1061             except Exception:
1062                 tb = traceback.format_exc()
1063                 self._downloader.report_warning(
1064                     u'Automatic signature extraction failed: ' + tb)
1065
1066         self._downloader.report_warning(
1067             u'Warning: Falling back to static signature algorithm')
1068         return self._static_decrypt_signature(
1069             s, video_id, player_url, age_gate)
1070
1071     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1072         if age_gate:
1073             # The videos with age protection use another player, so the
1074             # algorithms can be different.
1075             if len(s) == 86:
1076                 return s[2:63] + s[82] + s[64:82] + s[63]
1077
1078         if len(s) == 93:
1079             return s[86:29:-1] + s[88] + s[28:5:-1]
1080         elif len(s) == 92:
1081             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1082         elif len(s) == 91:
1083             return s[84:27:-1] + s[86] + s[26:5:-1]
1084         elif len(s) == 90:
1085             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1086         elif len(s) == 89:
1087             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1088         elif len(s) == 88:
1089             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1090         elif len(s) == 87:
1091             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1092         elif len(s) == 86:
1093             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1094         elif len(s) == 85:
1095             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1096         elif len(s) == 84:
1097             return s[81:36:-1] + s[0] + s[35:2:-1]
1098         elif len(s) == 83:
1099             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1100         elif len(s) == 82:
1101             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1102         elif len(s) == 81:
1103             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1104         elif len(s) == 80:
1105             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1106         elif len(s) == 79:
1107             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1108
1109         else:
1110             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1111
1112     def _decrypt_signature_age_gate(self, s):
1113         # The videos with age protection use another player, so the algorithms
1114         # can be different.
1115         if len(s) == 86:
1116             return s[2:63] + s[82] + s[64:82] + s[63]
1117         else:
1118             # Fallback to the other algortihms
1119             return self._decrypt_signature(s)
1120
1121     def _get_available_subtitles(self, video_id):
1122         try:
1123             sub_list = self._download_webpage(
1124                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1125                 video_id, note=False)
1126         except ExtractorError as err:
1127             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1128             return {}
1129         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1130
1131         sub_lang_list = {}
1132         for l in lang_list:
1133             lang = l[1]
1134             params = compat_urllib_parse.urlencode({
1135                 'lang': lang,
1136                 'v': video_id,
1137                 'fmt': self._downloader.params.get('subtitlesformat'),
1138             })
1139             url = u'http://www.youtube.com/api/timedtext?' + params
1140             sub_lang_list[lang] = url
1141         if not sub_lang_list:
1142             self._downloader.report_warning(u'video doesn\'t have subtitles')
1143             return {}
1144         return sub_lang_list
1145
1146     def _get_available_automatic_caption(self, video_id, webpage):
1147         """We need the webpage for getting the captions url, pass it as an
1148            argument to speed up the process."""
1149         sub_format = self._downloader.params.get('subtitlesformat')
1150         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1151         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1152         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1153         if mobj is None:
1154             self._downloader.report_warning(err_msg)
1155             return {}
1156         player_config = json.loads(mobj.group(1))
1157         try:
1158             args = player_config[u'args']
1159             caption_url = args[u'ttsurl']
1160             timestamp = args[u'timestamp']
1161             # We get the available subtitles
1162             list_params = compat_urllib_parse.urlencode({
1163                 'type': 'list',
1164                 'tlangs': 1,
1165                 'asrs': 1,
1166             })
1167             list_url = caption_url + '&' + list_params
1168             list_page = self._download_webpage(list_url, video_id)
1169             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1170             original_lang_node = caption_list.find('track')
1171             if original_lang_node.attrib.get('kind') != 'asr' :
1172                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1173                 return {}
1174             original_lang = original_lang_node.attrib['lang_code']
1175
1176             sub_lang_list = {}
1177             for lang_node in caption_list.findall('target'):
1178                 sub_lang = lang_node.attrib['lang_code']
1179                 params = compat_urllib_parse.urlencode({
1180                     'lang': original_lang,
1181                     'tlang': sub_lang,
1182                     'fmt': sub_format,
1183                     'ts': timestamp,
1184                     'kind': 'asr',
1185                 })
1186                 sub_lang_list[sub_lang] = caption_url + '&' + params
1187             return sub_lang_list
1188         # An extractor error can be raise by the download process if there are
1189         # no automatic captions but there are subtitles
1190         except (KeyError, ExtractorError):
1191             self._downloader.report_warning(err_msg)
1192             return {}
1193
1194     def _print_formats(self, formats):
1195         print('Available formats:')
1196         for x in formats:
1197             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1198                                         self._video_dimensions.get(x, '???'),
1199                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1200
1201     def _extract_id(self, url):
1202         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1203         if mobj is None:
1204             raise ExtractorError(u'Invalid URL: %s' % url)
1205         video_id = mobj.group(2)
1206         return video_id
1207
1208     def _get_video_url_list(self, url_map):
1209         """
1210         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1211         with the requested formats.
1212         """
1213         req_format = self._downloader.params.get('format', None)
1214         format_limit = self._downloader.params.get('format_limit', None)
1215         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1216         if format_limit is not None and format_limit in available_formats:
1217             format_list = available_formats[available_formats.index(format_limit):]
1218         else:
1219             format_list = available_formats
1220         existing_formats = [x for x in format_list if x in url_map]
1221         if len(existing_formats) == 0:
1222             raise ExtractorError(u'no known formats available for video')
1223         if self._downloader.params.get('listformats', None):
1224             self._print_formats(existing_formats)
1225             return
1226         if req_format is None or req_format == 'best':
1227             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1228         elif req_format == 'worst':
1229             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1230         elif req_format in ('-1', 'all'):
1231             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1232         else:
1233             # Specific formats. We pick the first in a slash-delimeted sequence.
1234             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1235             # available in the specified format. For example,
1236             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1237             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1238             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1239             req_formats = req_format.split('/')
1240             video_url_list = None
1241             for rf in req_formats:
1242                 if rf in url_map:
1243                     video_url_list = [(rf, url_map[rf])]
1244                     break
1245                 if rf in self._video_formats_map:
1246                     for srf in self._video_formats_map[rf]:
1247                         if srf in url_map:
1248                             video_url_list = [(srf, url_map[srf])]
1249                             break
1250                     else:
1251                         continue
1252                     break
1253             if video_url_list is None:
1254                 raise ExtractorError(u'requested format not available')
1255         return video_url_list
1256
1257     def _extract_from_m3u8(self, manifest_url, video_id):
1258         url_map = {}
1259         def _get_urls(_manifest):
1260             lines = _manifest.split('\n')
1261             urls = filter(lambda l: l and not l.startswith('#'),
1262                             lines)
1263             return urls
1264         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1265         formats_urls = _get_urls(manifest)
1266         for format_url in formats_urls:
1267             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1268             url_map[itag] = format_url
1269         return url_map
1270
1271     def _real_extract(self, url):
1272         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1273             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1274
1275         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1276         mobj = re.search(self._NEXT_URL_RE, url)
1277         if mobj:
1278             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1279         video_id = self._extract_id(url)
1280
1281         # Get video webpage
1282         self.report_video_webpage_download(video_id)
1283         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1284         request = compat_urllib_request.Request(url)
1285         try:
1286             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1287         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1288             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1289
1290         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1291
1292         # Attempt to extract SWF player URL
1293         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1294         if mobj is not None:
1295             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1296         else:
1297             player_url = None
1298
1299         # Get video info
1300         self.report_video_info_webpage_download(video_id)
1301         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1302             self.report_age_confirmation()
1303             age_gate = True
1304             # We simulate the access to the video from www.youtube.com/v/{video_id}
1305             # this can be viewed without login into Youtube
1306             data = compat_urllib_parse.urlencode({'video_id': video_id,
1307                                                   'el': 'embedded',
1308                                                   'gl': 'US',
1309                                                   'hl': 'en',
1310                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1311                                                   'asv': 3,
1312                                                   'sts':'1588',
1313                                                   })
1314             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1315             video_info_webpage = self._download_webpage(video_info_url, video_id,
1316                                     note=False,
1317                                     errnote='unable to download video info webpage')
1318             video_info = compat_parse_qs(video_info_webpage)
1319         else:
1320             age_gate = False
1321             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1322                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1323                         % (video_id, el_type))
1324                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1325                                         note=False,
1326                                         errnote='unable to download video info webpage')
1327                 video_info = compat_parse_qs(video_info_webpage)
1328                 if 'token' in video_info:
1329                     break
1330         if 'token' not in video_info:
1331             if 'reason' in video_info:
1332                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1333             else:
1334                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1335
1336         # Check for "rental" videos
1337         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1338             raise ExtractorError(u'"rental" videos not supported')
1339
1340         # Start extracting information
1341         self.report_information_extraction(video_id)
1342
1343         # uploader
1344         if 'author' not in video_info:
1345             raise ExtractorError(u'Unable to extract uploader name')
1346         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1347
1348         # uploader_id
1349         video_uploader_id = None
1350         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1351         if mobj is not None:
1352             video_uploader_id = mobj.group(1)
1353         else:
1354             self._downloader.report_warning(u'unable to extract uploader nickname')
1355
1356         # title
1357         if 'title' not in video_info:
1358             raise ExtractorError(u'Unable to extract video title')
1359         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1360
1361         # thumbnail image
1362         # We try first to get a high quality image:
1363         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1364                             video_webpage, re.DOTALL)
1365         if m_thumb is not None:
1366             video_thumbnail = m_thumb.group(1)
1367         elif 'thumbnail_url' not in video_info:
1368             self._downloader.report_warning(u'unable to extract video thumbnail')
1369             video_thumbnail = ''
1370         else:   # don't panic if we can't find it
1371             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1372
1373         # upload date
1374         upload_date = None
1375         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1376         if mobj is not None:
1377             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1378             upload_date = unified_strdate(upload_date)
1379
1380         # description
1381         video_description = get_element_by_id("eow-description", video_webpage)
1382         if video_description:
1383             video_description = clean_html(video_description)
1384         else:
1385             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1386             if fd_mobj:
1387                 video_description = unescapeHTML(fd_mobj.group(1))
1388             else:
1389                 video_description = u''
1390
1391         # subtitles
1392         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1393
1394         if self._downloader.params.get('listsubtitles', False):
1395             self._list_available_subtitles(video_id, video_webpage)
1396             return
1397
1398         if 'length_seconds' not in video_info:
1399             self._downloader.report_warning(u'unable to extract video duration')
1400             video_duration = ''
1401         else:
1402             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1403
1404         # Decide which formats to download
1405
1406         try:
1407             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1408             if not mobj:
1409                 raise ValueError('Could not find vevo ID')
1410             info = json.loads(mobj.group(1))
1411             args = info['args']
1412             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1413             # this signatures are encrypted
1414             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1415             if m_s is not None:
1416                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1417                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1418             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1419             if m_s is not None:
1420                 if 'url_encoded_fmt_stream_map' in video_info:
1421                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1422                 else:
1423                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1424             elif 'adaptive_fmts' in video_info:
1425                 if 'url_encoded_fmt_stream_map' in video_info:
1426                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1427                 else:
1428                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1429         except ValueError:
1430             pass
1431
1432         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1433             self.report_rtmp_download()
1434             video_url_list = [(None, video_info['conn'][0])]
1435         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1436             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1437                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1438             url_map = {}
1439             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1440                 url_data = compat_parse_qs(url_data_str)
1441                 if 'itag' in url_data and 'url' in url_data:
1442                     url = url_data['url'][0]
1443                     if 'sig' in url_data:
1444                         url += '&signature=' + url_data['sig'][0]
1445                     elif 's' in url_data:
1446                         encrypted_sig = url_data['s'][0]
1447                         if self._downloader.params.get('verbose'):
1448                             if age_gate:
1449                                 player_version = self._search_regex(
1450                                     r'-(.+)\.swf$',
1451                                     player_url if player_url else None,
1452                                     'flash player', fatal=False)
1453                                 player_desc = 'flash player %s' % player_version
1454                             else:
1455                                 player_version = self._search_regex(
1456                                     r'html5player-(.+?)\.js', video_webpage,
1457                                     'html5 player', fatal=False)
1458                                 player_desc = u'html5 player %s' % player_version
1459
1460                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1461                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1462                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1463
1464                         if not age_gate:
1465                             jsplayer_url_json = self._search_regex(
1466                                 r'"assets":.+?"js":\s*("[^"]+")',
1467                                 video_webpage, u'JS player URL')
1468                             player_url = json.loads(jsplayer_url_json)
1469
1470                         signature = self._decrypt_signature(
1471                             encrypted_sig, video_id, player_url, age_gate)
1472                         url += '&signature=' + signature
1473                     if 'ratebypass' not in url:
1474                         url += '&ratebypass=yes'
1475                     url_map[url_data['itag'][0]] = url
1476             video_url_list = self._get_video_url_list(url_map)
1477             if not video_url_list:
1478                 return
1479         elif video_info.get('hlsvp'):
1480             manifest_url = video_info['hlsvp'][0]
1481             url_map = self._extract_from_m3u8(manifest_url, video_id)
1482             video_url_list = self._get_video_url_list(url_map)
1483             if not video_url_list:
1484                 return
1485
1486         else:
1487             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1488
1489         results = []
1490         for format_param, video_real_url in video_url_list:
1491             # Extension
1492             video_extension = self._video_extensions.get(format_param, 'flv')
1493
1494             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1495                                               self._video_dimensions.get(format_param, '???'),
1496                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1497
1498             results.append({
1499                 'id':       video_id,
1500                 'url':      video_real_url,
1501                 'uploader': video_uploader,
1502                 'uploader_id': video_uploader_id,
1503                 'upload_date':  upload_date,
1504                 'title':    video_title,
1505                 'ext':      video_extension,
1506                 'format':   video_format,
1507                 'thumbnail':    video_thumbnail,
1508                 'description':  video_description,
1509                 'player_url':   player_url,
1510                 'subtitles':    video_subtitles,
1511                 'duration':     video_duration
1512             })
1513         return results
1514
1515 class YoutubePlaylistIE(InfoExtractor):
1516     IE_DESC = u'YouTube.com playlists'
1517     _VALID_URL = r"""(?:
1518                         (?:https?://)?
1519                         (?:\w+\.)?
1520                         youtube\.com/
1521                         (?:
1522                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1523                            \? (?:.*?&)*? (?:p|a|list)=
1524                         |  p/
1525                         )
1526                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1527                         .*
1528                      |
1529                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1530                      )"""
1531     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1532     _MAX_RESULTS = 50
1533     IE_NAME = u'youtube:playlist'
1534
1535     @classmethod
1536     def suitable(cls, url):
1537         """Receives a URL and returns True if suitable for this IE."""
1538         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1539
1540     def _real_extract(self, url):
1541         # Extract playlist id
1542         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1543         if mobj is None:
1544             raise ExtractorError(u'Invalid URL: %s' % url)
1545
1546         # Download playlist videos from API
1547         playlist_id = mobj.group(1) or mobj.group(2)
1548         videos = []
1549
1550         for page_num in itertools.count(1):
1551             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1552             if start_index >= 1000:
1553                 self._downloader.report_warning(u'Max number of results reached')
1554                 break
1555             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1556             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1557
1558             try:
1559                 response = json.loads(page)
1560             except ValueError as err:
1561                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1562
1563             if 'feed' not in response:
1564                 raise ExtractorError(u'Got a malformed response from YouTube API')
1565             playlist_title = response['feed']['title']['$t']
1566             if 'entry' not in response['feed']:
1567                 # Number of videos is a multiple of self._MAX_RESULTS
1568                 break
1569
1570             for entry in response['feed']['entry']:
1571                 index = entry['yt$position']['$t']
1572                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1573                     videos.append((
1574                         index,
1575                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1576                     ))
1577
1578         videos = [v[1] for v in sorted(videos)]
1579
1580         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1581         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1582
1583
1584 class YoutubeChannelIE(InfoExtractor):
1585     IE_DESC = u'YouTube.com channels'
1586     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1587     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1588     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1589     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1590     IE_NAME = u'youtube:channel'
1591
1592     def extract_videos_from_page(self, page):
1593         ids_in_page = []
1594         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1595             if mobj.group(1) not in ids_in_page:
1596                 ids_in_page.append(mobj.group(1))
1597         return ids_in_page
1598
1599     def _real_extract(self, url):
1600         # Extract channel id
1601         mobj = re.match(self._VALID_URL, url)
1602         if mobj is None:
1603             raise ExtractorError(u'Invalid URL: %s' % url)
1604
1605         # Download channel page
1606         channel_id = mobj.group(1)
1607         video_ids = []
1608         pagenum = 1
1609
1610         url = self._TEMPLATE_URL % (channel_id, pagenum)
1611         page = self._download_webpage(url, channel_id,
1612                                       u'Downloading page #%s' % pagenum)
1613
1614         # Extract video identifiers
1615         ids_in_page = self.extract_videos_from_page(page)
1616         video_ids.extend(ids_in_page)
1617
1618         # Download any subsequent channel pages using the json-based channel_ajax query
1619         if self._MORE_PAGES_INDICATOR in page:
1620             for pagenum in itertools.count(1):
1621                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1622                 page = self._download_webpage(url, channel_id,
1623                                               u'Downloading page #%s' % pagenum)
1624
1625                 page = json.loads(page)
1626
1627                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1628                 video_ids.extend(ids_in_page)
1629
1630                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1631                     break
1632
1633         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1634
1635         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1636         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1637         return [self.playlist_result(url_entries, channel_id)]
1638
1639
1640 class YoutubeUserIE(InfoExtractor):
1641     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1642     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1643     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1644     _GDATA_PAGE_SIZE = 50
1645     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1646     IE_NAME = u'youtube:user'
1647
1648     @classmethod
1649     def suitable(cls, url):
1650         # Don't return True if the url can be extracted with other youtube
1651         # extractor, the regex would is too permissive and it would match.
1652         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1653         if any(ie.suitable(url) for ie in other_ies): return False
1654         else: return super(YoutubeUserIE, cls).suitable(url)
1655
1656     def _real_extract(self, url):
1657         # Extract username
1658         mobj = re.match(self._VALID_URL, url)
1659         if mobj is None:
1660             raise ExtractorError(u'Invalid URL: %s' % url)
1661
1662         username = mobj.group(1)
1663
1664         # Download video ids using YouTube Data API. Result size per
1665         # query is limited (currently to 50 videos) so we need to query
1666         # page by page until there are no video ids - it means we got
1667         # all of them.
1668
1669         video_ids = []
1670
1671         for pagenum in itertools.count(0):
1672             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1673
1674             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1675             page = self._download_webpage(gdata_url, username,
1676                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1677
1678             try:
1679                 response = json.loads(page)
1680             except ValueError as err:
1681                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1682             if 'entry' not in response['feed']:
1683                 # Number of videos is a multiple of self._MAX_RESULTS
1684                 break
1685
1686             # Extract video identifiers
1687             ids_in_page = []
1688             for entry in response['feed']['entry']:
1689                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1690             video_ids.extend(ids_in_page)
1691
1692             # A little optimization - if current page is not
1693             # "full", ie. does not contain PAGE_SIZE video ids then
1694             # we can assume that this page is the last one - there
1695             # are no more ids on further pages - no need to query
1696             # again.
1697
1698             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1699                 break
1700
1701         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1702         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1703         return [self.playlist_result(url_results, playlist_title = username)]
1704
1705 class YoutubeSearchIE(SearchInfoExtractor):
1706     IE_DESC = u'YouTube.com searches'
1707     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1708     _MAX_RESULTS = 1000
1709     IE_NAME = u'youtube:search'
1710     _SEARCH_KEY = 'ytsearch'
1711
1712     def report_download_page(self, query, pagenum):
1713         """Report attempt to download search page with given number."""
1714         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1715
1716     def _get_n_results(self, query, n):
1717         """Get a specified number of results for a query"""
1718
1719         video_ids = []
1720         pagenum = 0
1721         limit = n
1722
1723         while (50 * pagenum) < limit:
1724             self.report_download_page(query, pagenum+1)
1725             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1726             request = compat_urllib_request.Request(result_url)
1727             try:
1728                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1729             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1730                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1731             api_response = json.loads(data)['data']
1732
1733             if not 'items' in api_response:
1734                 raise ExtractorError(u'[youtube] No video results')
1735
1736             new_ids = list(video['id'] for video in api_response['items'])
1737             video_ids += new_ids
1738
1739             limit = min(n, api_response['totalItems'])
1740             pagenum += 1
1741
1742         if len(video_ids) > n:
1743             video_ids = video_ids[:n]
1744         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1745         return self.playlist_result(videos, query)
1746
1747
1748 class YoutubeShowIE(InfoExtractor):
1749     IE_DESC = u'YouTube.com (multi-season) shows'
1750     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1751     IE_NAME = u'youtube:show'
1752
1753     def _real_extract(self, url):
1754         mobj = re.match(self._VALID_URL, url)
1755         show_name = mobj.group(1)
1756         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1757         # There's one playlist for each season of the show
1758         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1759         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1760         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1761
1762
1763 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1764     """
1765     Base class for extractors that fetch info from
1766     http://www.youtube.com/feed_ajax
1767     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1768     """
1769     _LOGIN_REQUIRED = True
1770     _PAGING_STEP = 30
1771     # use action_load_personal_feed instead of action_load_system_feed
1772     _PERSONAL_FEED = False
1773
1774     @property
1775     def _FEED_TEMPLATE(self):
1776         action = 'action_load_system_feed'
1777         if self._PERSONAL_FEED:
1778             action = 'action_load_personal_feed'
1779         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1780
1781     @property
1782     def IE_NAME(self):
1783         return u'youtube:%s' % self._FEED_NAME
1784
1785     def _real_initialize(self):
1786         self._login()
1787
1788     def _real_extract(self, url):
1789         feed_entries = []
1790         # The step argument is available only in 2.7 or higher
1791         for i in itertools.count(0):
1792             paging = i*self._PAGING_STEP
1793             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1794                                           u'%s feed' % self._FEED_NAME,
1795                                           u'Downloading page %s' % i)
1796             info = json.loads(info)
1797             feed_html = info['feed_html']
1798             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1799             ids = orderedSet(m.group(1) for m in m_ids)
1800             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1801             if info['paging'] is None:
1802                 break
1803         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1804
1805 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1806     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1807     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1808     _FEED_NAME = 'subscriptions'
1809     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1810
1811 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1814     _FEED_NAME = 'recommended'
1815     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1816
1817 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1818     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1820     _FEED_NAME = 'watch_later'
1821     _PLAYLIST_TITLE = u'Youtube Watch Later'
1822     _PAGING_STEP = 100
1823     _PERSONAL_FEED = True
1824
1825 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1826     IE_NAME = u'youtube:favorites'
1827     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1828     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1829     _LOGIN_REQUIRED = True
1830
1831     def _real_extract(self, url):
1832         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1833         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1834         return self.url_result(playlist_id, 'YoutubePlaylist')