_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_str,
  27
  28     clean_html,
  29     get_element_by_id,
  30     ExtractorError,
  31     unescapeHTML,
  32     unified_strdate,
  33     orderedSet,
  34     write_json_file,
  35 )
  36
  37 class YoutubeBaseInfoExtractor(InfoExtractor):
  38     """Provide base functions for Youtube extractors"""
  39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  42     _NETRC_MACHINE = 'youtube'
  43     # If True it will raise an error if no login info is provided
  44     _LOGIN_REQUIRED = False
  45
  46     def report_lang(self):
  47         """Report attempt to set language."""
  48         self.to_screen(u'Setting language')
  49
  50     def _set_language(self):
  51         request = compat_urllib_request.Request(self._LANG_URL)
  52         try:
  53             self.report_lang()
  54             compat_urllib_request.urlopen(request).read()
  55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  57             return False
  58         return True
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         # No authentication to be performed
  63         if username is None:
  64             if self._LOGIN_REQUIRED:
  65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  66             return False
  67
  68         request = compat_urllib_request.Request(self._LOGIN_URL)
  69         try:
  70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  73             return False
  74
  75         galx = None
  76         dsh = None
  77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  78         if match:
  79           galx = match.group(1)
  80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  81         if match:
  82           dsh = match.group(1)
  83
  84         # Log in
  85         login_form_strs = {
  86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  87                 u'Email': username,
  88                 u'GALX': galx,
  89                 u'Passwd': password,
  90                 u'PersistentCookie': u'yes',
  91                 u'_utf8': u'霱',
  92                 u'bgresponse': u'js_disabled',
  93                 u'checkConnection': u'',
  94                 u'checkedDomains': u'youtube',
  95                 u'dnConn': u'',
  96                 u'dsh': dsh,
  97                 u'pstMsg': u'0',
  98                 u'rmShown': u'1',
  99                 u'secTok': u'',
 100                 u'signIn': u'Sign in',
 101                 u'timeStmp': u'',
 102                 u'service': u'youtube',
 103                 u'uilel': u'3',
 104                 u'hl': u'en_US',
 105         }
 106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 107         # chokes on unicode
 108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 111         try:
 112             self.report_login()
 113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 115                 self._downloader.report_warning(u'unable to log in: bad username or password')
 116                 return False
 117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 119             return False
 120         return True
 121
 122     def _confirm_age(self):
 123         age_form = {
 124                 'next_url':     '/',
 125                 'action_confirm':   'Confirm',
 126                 }
 127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 128         try:
 129             self.report_age_confirmation()
 130             compat_urllib_request.urlopen(request).read().decode('utf-8')
 131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 133         return True
 134
 135     def _real_initialize(self):
 136         if self._downloader is None:
 137             return
 138         if not self._set_language():
 139             return
 140         if not self._login():
 141             return
 142         self._confirm_age()
 143
 144
 145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 146     IE_DESC = u'YouTube.com'
 147     _VALID_URL = r"""^
 148                      (
 149                          (?:https?://)?                                       # http(s):// (optional)
 150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 151                             tube\.majestyc\.net/|
 152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 154                          (?:                                                  # the various things that can precede the ID:
 155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 156                              |(?:                                             # or the v= param in all its forms
 157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 160                                  v=
 161                              )
 162                          ))
 163                          |youtu\.be/                                          # just youtu.be/xxxx
 164                          )
 165                      )?                                                       # all until now is optional -> you can pass the naked ID
 166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 167                      (?(1).+)?                                                # if we found the ID, everything can follow
 168                      $"""
 169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 170     # Listed in order of quality
 171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 172                           # Apple HTTP Live Streaming
 173                           '96', '95', '94', '93', '92', '132', '151',
 174                           # 3D
 175                           '85', '84', '102', '83', '101', '82', '100',
 176                           # Dash video
 177                           '138', '137', '248', '136', '247', '135', '246',
 178                           '245', '244', '134', '243', '133', '242', '160',
 179                           # Dash audio
 180                           '141', '172', '140', '171', '139',
 181                           ]
 182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 183                                       # Apple HTTP Live Streaming
 184                                       '96', '95', '94', '93', '92', '132', '151',
 185                                       # 3D
 186                                       '85', '102', '84', '101', '83', '100', '82',
 187                                       # Dash video
 188                                       '138', '248', '137', '247', '136', '246', '245',
 189                                       '244', '135', '243', '134', '242', '133', '160',
 190                                       # Dash audio
 191                                       '172', '141', '171', '140', '139',
 192                                       ]
 193     _video_formats_map = {
 194         'flv': ['35', '34', '6', '5'],
 195         '3gp': ['36', '17', '13'],
 196         'mp4': ['38', '37', '22', '18'],
 197         'webm': ['46', '45', '44', '43'],
 198     }
 199     _video_extensions = {
 200         '13': '3gp',
 201         '17': '3gp',
 202         '18': 'mp4',
 203         '22': 'mp4',
 204         '36': '3gp',
 205         '37': 'mp4',
 206         '38': 'mp4',
 207         '43': 'webm',
 208         '44': 'webm',
 209         '45': 'webm',
 210         '46': 'webm',
 211
 212         # 3d videos
 213         '82': 'mp4',
 214         '83': 'mp4',
 215         '84': 'mp4',
 216         '85': 'mp4',
 217         '100': 'webm',
 218         '101': 'webm',
 219         '102': 'webm',
 220
 221         # Apple HTTP Live Streaming
 222         '92': 'mp4',
 223         '93': 'mp4',
 224         '94': 'mp4',
 225         '95': 'mp4',
 226         '96': 'mp4',
 227         '132': 'mp4',
 228         '151': 'mp4',
 229
 230         # Dash mp4
 231         '133': 'mp4',
 232         '134': 'mp4',
 233         '135': 'mp4',
 234         '136': 'mp4',
 235         '137': 'mp4',
 236         '138': 'mp4',
 237         '139': 'mp4',
 238         '140': 'mp4',
 239         '141': 'mp4',
 240         '160': 'mp4',
 241
 242         # Dash webm
 243         '171': 'webm',
 244         '172': 'webm',
 245         '242': 'webm',
 246         '243': 'webm',
 247         '244': 'webm',
 248         '245': 'webm',
 249         '246': 'webm',
 250         '247': 'webm',
 251         '248': 'webm',
 252     }
 253     _video_dimensions = {
 254         '5': '240x400',
 255         '6': '???',
 256         '13': '???',
 257         '17': '144x176',
 258         '18': '360x640',
 259         '22': '720x1280',
 260         '34': '360x640',
 261         '35': '480x854',
 262         '36': '240x320',
 263         '37': '1080x1920',
 264         '38': '3072x4096',
 265         '43': '360x640',
 266         '44': '480x854',
 267         '45': '720x1280',
 268         '46': '1080x1920',
 269         '82': '360p',
 270         '83': '480p',
 271         '84': '720p',
 272         '85': '1080p',
 273         '92': '240p',
 274         '93': '360p',
 275         '94': '480p',
 276         '95': '720p',
 277         '96': '1080p',
 278         '100': '360p',
 279         '101': '480p',
 280         '102': '720p',
 281         '132': '240p',
 282         '151': '72p',
 283         '133': '240p',
 284         '134': '360p',
 285         '135': '480p',
 286         '136': '720p',
 287         '137': '1080p',
 288         '138': '>1080p',
 289         '139': '48k',
 290         '140': '128k',
 291         '141': '256k',
 292         '160': '192p',
 293         '171': '128k',
 294         '172': '256k',
 295         '242': '240p',
 296         '243': '360p',
 297         '244': '480p',
 298         '245': '480p',
 299         '246': '480p',
 300         '247': '720p',
 301         '248': '1080p',
 302     }
 303     _special_itags = {
 304         '82': '3D',
 305         '83': '3D',
 306         '84': '3D',
 307         '85': '3D',
 308         '100': '3D',
 309         '101': '3D',
 310         '102': '3D',
 311         '133': 'DASH Video',
 312         '134': 'DASH Video',
 313         '135': 'DASH Video',
 314         '136': 'DASH Video',
 315         '137': 'DASH Video',
 316         '138': 'DASH Video',
 317         '139': 'DASH Audio',
 318         '140': 'DASH Audio',
 319         '141': 'DASH Audio',
 320         '160': 'DASH Video',
 321         '171': 'DASH Audio',
 322         '172': 'DASH Audio',
 323         '242': 'DASH Video',
 324         '243': 'DASH Video',
 325         '244': 'DASH Video',
 326         '245': 'DASH Video',
 327         '246': 'DASH Video',
 328         '247': 'DASH Video',
 329         '248': 'DASH Video',
 330     }
 331
 332     IE_NAME = u'youtube'
 333     _TESTS = [
 334         {
 335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 336             u"file":  u"BaW_jenozKc.mp4",
 337             u"info_dict": {
 338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 339                 u"uploader": u"Philipp Hagemeister",
 340                 u"uploader_id": u"phihag",
 341                 u"upload_date": u"20121002",
 342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 343             }
 344         },
 345         {
 346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 347             u"file":  u"1ltcDfZMA3U.flv",
 348             u"note": u"Test VEVO video (#897)",
 349             u"info_dict": {
 350                 u"upload_date": u"20070518",
 351                 u"title": u"Maps - It Will Find You",
 352                 u"description": u"Music video by Maps performing It Will Find You.",
 353                 u"uploader": u"MuteUSA",
 354                 u"uploader_id": u"MuteUSA"
 355             }
 356         },
 357         {
 358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 359             u"file":  u"UxxajLWwzqY.mp4",
 360             u"note": u"Test generic use_cipher_signature video (#897)",
 361             u"info_dict": {
 362                 u"upload_date": u"20120506",
 363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 364                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 365                 u"uploader": u"Icona Pop",
 366                 u"uploader_id": u"IconaPop"
 367             }
 368         },
 369         {
 370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 371             u"file":  u"07FYdnEawAQ.mp4",
 372             u"note": u"Test VEVO video with age protection (#956)",
 373             u"info_dict": {
 374                 u"upload_date": u"20130703",
 375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 377                 u"uploader": u"justintimberlakeVEVO",
 378                 u"uploader_id": u"justintimberlakeVEVO"
 379             }
 380         },
 381     ]
 382
 383
 384     @classmethod
 385     def suitable(cls, url):
 386         """Receives a URL and returns True if suitable for this IE."""
 387         if YoutubePlaylistIE.suitable(url): return False
 388         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 389
 390     def __init__(self, *args, **kwargs):
 391         super(YoutubeIE, self).__init__(*args, **kwargs)
 392         self._player_cache = {}
 393
 394     def report_video_webpage_download(self, video_id):
 395         """Report attempt to download video webpage."""
 396         self.to_screen(u'%s: Downloading video webpage' % video_id)
 397
 398     def report_video_info_webpage_download(self, video_id):
 399         """Report attempt to download video info webpage."""
 400         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 401
 402     def report_information_extraction(self, video_id):
 403         """Report attempt to extract video information."""
 404         self.to_screen(u'%s: Extracting video information' % video_id)
 405
 406     def report_unavailable_format(self, video_id, format):
 407         """Report extracted video URL."""
 408         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 409
 410     def report_rtmp_download(self):
 411         """Indicate the download will use the RTMP protocol."""
 412         self.to_screen(u'RTMP download detected')
 413
 414     def _extract_signature_function(self, video_id, player_url, slen):
 415         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 416                         player_url)
 417         player_type = id_m.group('ext')
 418         player_id = id_m.group('id')
 419
 420         # Read from filesystem cache
 421         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 422         assert os.path.basename(func_id) == func_id
 423         cache_dir = self._downloader.params.get('cachedir',
 424                                                 u'~/.youtube-dl/cache')
 425
 426         cache_enabled = cache_dir is not None
 427         if cache_enabled:
 428             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 429                                     u'youtube-sigfuncs',
 430                                     func_id + '.json')
 431             try:
 432                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 433                     cache_spec = json.load(cachef)
 434                 return lambda s: u''.join(s[i] for i in cache_spec)
 435             except IOError:
 436                 pass  # No cache available
 437
 438         if player_type == 'js':
 439             code = self._download_webpage(
 440                 player_url, video_id,
 441                 note=u'Downloading %s player %s' % (player_type, player_id),
 442                 errnote=u'Download of %s failed' % player_url)
 443             res = self._parse_sig_js(code)
 444         elif player_type == 'swf':
 445             urlh = self._request_webpage(
 446                 player_url, video_id,
 447                 note=u'Downloading %s player %s' % (player_type, player_id),
 448                 errnote=u'Download of %s failed' % player_url)
 449             code = urlh.read()
 450             res = self._parse_sig_swf(code)
 451         else:
 452             assert False, 'Invalid player type %r' % player_type
 453
 454         if cache_enabled:
 455             try:
 456                 test_string = u''.join(map(compat_chr, range(slen)))
 457                 cache_res = res(test_string)
 458                 cache_spec = [ord(c) for c in cache_res]
 459                 try:
 460                     os.makedirs(os.path.dirname(cache_fn))
 461                 except OSError as ose:
 462                     if ose.errno != errno.EEXIST:
 463                         raise
 464                 write_json_file(cache_spec, cache_fn)
 465             except Exception:
 466                 tb = traceback.format_exc()
 467                 self._downloader.report_warning(
 468                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 469
 470         return res
 471
 472     def _print_sig_code(self, func, slen):
 473         def gen_sig_code(idxs):
 474             def _genslice(start, end, step):
 475                 starts = u'' if start == 0 else str(start)
 476                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 477                 steps = u'' if step == 1 else (u':%d' % step)
 478                 return u's[%s%s%s]' % (starts, ends, steps)
 479
 480             step = None
 481             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 482                                     # set as soon as step is set
 483             for i, prev in zip(idxs[1:], idxs[:-1]):
 484                 if step is not None:
 485                     if i - prev == step:
 486                         continue
 487                     yield _genslice(start, prev, step)
 488                     step = None
 489                     continue
 490                 if i - prev in [-1, 1]:
 491                     step = i - prev
 492                     start = prev
 493                     continue
 494                 else:
 495                     yield u's[%d]' % prev
 496             if step is None:
 497                 yield u's[%d]' % i
 498             else:
 499                 yield _genslice(start, i, step)
 500
 501         test_string = u''.join(map(compat_chr, range(slen)))
 502         cache_res = func(test_string)
 503         cache_spec = [ord(c) for c in cache_res]
 504         expr_code = u' + '.join(gen_sig_code(cache_spec))
 505         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 506         self.to_screen(u'Extracted signature function:\n' + code)
 507
 508     def _parse_sig_js(self, jscode):
 509         funcname = self._search_regex(
 510             r'signature=([a-zA-Z]+)', jscode,
 511             u'Initial JS player signature function name')
 512
 513         functions = {}
 514
 515         def argidx(varname):
 516             return string.lowercase.index(varname)
 517
 518         def interpret_statement(stmt, local_vars, allow_recursion=20):
 519             if allow_recursion < 0:
 520                 raise ExtractorError(u'Recursion limit reached')
 521
 522             if stmt.startswith(u'var '):
 523                 stmt = stmt[len(u'var '):]
 524             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 525                              r'=(?P<expr>.*)$', stmt)
 526             if ass_m:
 527                 if ass_m.groupdict().get('index'):
 528                     def assign(val):
 529                         lvar = local_vars[ass_m.group('out')]
 530                         idx = interpret_expression(ass_m.group('index'),
 531                                                    local_vars, allow_recursion)
 532                         assert isinstance(idx, int)
 533                         lvar[idx] = val
 534                         return val
 535                     expr = ass_m.group('expr')
 536                 else:
 537                     def assign(val):
 538                         local_vars[ass_m.group('out')] = val
 539                         return val
 540                     expr = ass_m.group('expr')
 541             elif stmt.startswith(u'return '):
 542                 assign = lambda v: v
 543                 expr = stmt[len(u'return '):]
 544             else:
 545                 raise ExtractorError(
 546                     u'Cannot determine left side of statement in %r' % stmt)
 547
 548             v = interpret_expression(expr, local_vars, allow_recursion)
 549             return assign(v)
 550
 551         def interpret_expression(expr, local_vars, allow_recursion):
 552             if expr.isdigit():
 553                 return int(expr)
 554
 555             if expr.isalpha():
 556                 return local_vars[expr]
 557
 558             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 559             if m:
 560                 member = m.group('member')
 561                 val = local_vars[m.group('in')]
 562                 if member == 'split("")':
 563                     return list(val)
 564                 if member == 'join("")':
 565                     return u''.join(val)
 566                 if member == 'length':
 567                     return len(val)
 568                 if member == 'reverse()':
 569                     return val[::-1]
 570                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 571                 if slice_m:
 572                     idx = interpret_expression(
 573                         slice_m.group('idx'), local_vars, allow_recursion-1)
 574                     return val[idx:]
 575
 576             m = re.match(
 577                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 578             if m:
 579                 val = local_vars[m.group('in')]
 580                 idx = interpret_expression(m.group('idx'), local_vars,
 581                                            allow_recursion-1)
 582                 return val[idx]
 583
 584             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 585             if m:
 586                 a = interpret_expression(m.group('a'),
 587                                          local_vars, allow_recursion)
 588                 b = interpret_expression(m.group('b'),
 589                                          local_vars, allow_recursion)
 590                 return a % b
 591
 592             m = re.match(
 593                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 594             if m:
 595                 fname = m.group('func')
 596                 if fname not in functions:
 597                     functions[fname] = extract_function(fname)
 598                 argvals = [int(v) if v.isdigit() else local_vars[v]
 599                            for v in m.group('args').split(',')]
 600                 return functions[fname](argvals)
 601             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 602
 603         def extract_function(funcname):
 604             func_m = re.search(
 605                 r'function ' + re.escape(funcname) +
 606                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 607                 jscode)
 608             argnames = func_m.group('args').split(',')
 609
 610             def resf(args):
 611                 local_vars = dict(zip(argnames, args))
 612                 for stmt in func_m.group('code').split(';'):
 613                     res = interpret_statement(stmt, local_vars)
 614                 return res
 615             return resf
 616
 617         initial_function = extract_function(funcname)
 618         return lambda s: initial_function([s])
 619
 620     def _parse_sig_swf(self, file_contents):
 621         if file_contents[1:3] != b'WS':
 622             raise ExtractorError(
 623                 u'Not an SWF file; header is %r' % file_contents[:3])
 624         if file_contents[:1] == b'C':
 625             content = zlib.decompress(file_contents[8:])
 626         else:
 627             raise NotImplementedError(u'Unsupported compression format %r' %
 628                                       file_contents[:1])
 629
 630         def extract_tags(content):
 631             pos = 0
 632             while pos < len(content):
 633                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 634                 pos += 2
 635                 tag_code = header16 >> 6
 636                 tag_len = header16 & 0x3f
 637                 if tag_len == 0x3f:
 638                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 639                     pos += 4
 640                 assert pos+tag_len <= len(content)
 641                 yield (tag_code, content[pos:pos+tag_len])
 642                 pos += tag_len
 643
 644         code_tag = next(tag
 645                         for tag_code, tag in extract_tags(content)
 646                         if tag_code == 82)
 647         p = code_tag.index(b'\0', 4) + 1
 648         code_reader = io.BytesIO(code_tag[p:])
 649
 650         # Parse ABC (AVM2 ByteCode)
 651         def read_int(reader=None):
 652             if reader is None:
 653                 reader = code_reader
 654             res = 0
 655             shift = 0
 656             for _ in range(5):
 657                 buf = reader.read(1)
 658                 assert len(buf) == 1
 659                 b = struct.unpack('<B', buf)[0]
 660                 res = res | ((b & 0x7f) << shift)
 661                 if b & 0x80 == 0:
 662                     break
 663                 shift += 7
 664             return res
 665
 666         def u30(reader=None):
 667             res = read_int(reader)
 668             assert res & 0xf0000000 == 0
 669             return res
 670         u32 = read_int
 671
 672         def s32(reader=None):
 673             v = read_int(reader)
 674             if v & 0x80000000 != 0:
 675                 v = - ((v ^ 0xffffffff) + 1)
 676             return v
 677
 678         def read_string(reader=None):
 679             if reader is None:
 680                 reader = code_reader
 681             slen = u30(reader)
 682             resb = reader.read(slen)
 683             assert len(resb) == slen
 684             return resb.decode('utf-8')
 685
 686         def read_bytes(count, reader=None):
 687             if reader is None:
 688                 reader = code_reader
 689             resb = reader.read(count)
 690             assert len(resb) == count
 691             return resb
 692
 693         def read_byte(reader=None):
 694             resb = read_bytes(1, reader=reader)
 695             res = struct.unpack('<B', resb)[0]
 696             return res
 697
 698         # minor_version + major_version
 699         read_bytes(2 + 2)
 700
 701         # Constant pool
 702         int_count = u30()
 703         for _c in range(1, int_count):
 704             s32()
 705         uint_count = u30()
 706         for _c in range(1, uint_count):
 707             u32()
 708         double_count = u30()
 709         read_bytes((double_count-1) * 8)
 710         string_count = u30()
 711         constant_strings = [u'']
 712         for _c in range(1, string_count):
 713             s = read_string()
 714             constant_strings.append(s)
 715         namespace_count = u30()
 716         for _c in range(1, namespace_count):
 717             read_bytes(1)  # kind
 718             u30()  # name
 719         ns_set_count = u30()
 720         for _c in range(1, ns_set_count):
 721             count = u30()
 722             for _c2 in range(count):
 723                 u30()
 724         multiname_count = u30()
 725         MULTINAME_SIZES = {
 726             0x07: 2,  # QName
 727             0x0d: 2,  # QNameA
 728             0x0f: 1,  # RTQName
 729             0x10: 1,  # RTQNameA
 730             0x11: 0,  # RTQNameL
 731             0x12: 0,  # RTQNameLA
 732             0x09: 2,  # Multiname
 733             0x0e: 2,  # MultinameA
 734             0x1b: 1,  # MultinameL
 735             0x1c: 1,  # MultinameLA
 736         }
 737         multinames = [u'']
 738         for _c in range(1, multiname_count):
 739             kind = u30()
 740             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 741             if kind == 0x07:
 742                 u30()  # namespace_idx
 743                 name_idx = u30()
 744                 multinames.append(constant_strings[name_idx])
 745             else:
 746                 multinames.append('[MULTINAME kind: %d]' % kind)
 747                 for _c2 in range(MULTINAME_SIZES[kind]):
 748                     u30()
 749
 750         # Methods
 751         method_count = u30()
 752         MethodInfo = collections.namedtuple(
 753             'MethodInfo',
 754             ['NEED_ARGUMENTS', 'NEED_REST'])
 755         method_infos = []
 756         for method_id in range(method_count):
 757             param_count = u30()
 758             u30()  # return type
 759             for _ in range(param_count):
 760                 u30()  # param type
 761             u30()  # name index (always 0 for youtube)
 762             flags = read_byte()
 763             if flags & 0x08 != 0:
 764                 # Options present
 765                 option_count = u30()
 766                 for c in range(option_count):
 767                     u30()  # val
 768                     read_bytes(1)  # kind
 769             if flags & 0x80 != 0:
 770                 # Param names present
 771                 for _ in range(param_count):
 772                     u30()  # param name
 773             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 774             method_infos.append(mi)
 775
 776         # Metadata
 777         metadata_count = u30()
 778         for _c in range(metadata_count):
 779             u30()  # name
 780             item_count = u30()
 781             for _c2 in range(item_count):
 782                 u30()  # key
 783                 u30()  # value
 784
 785         def parse_traits_info():
 786             trait_name_idx = u30()
 787             kind_full = read_byte()
 788             kind = kind_full & 0x0f
 789             attrs = kind_full >> 4
 790             methods = {}
 791             if kind in [0x00, 0x06]:  # Slot or Const
 792                 u30()  # Slot id
 793                 u30()  # type_name_idx
 794                 vindex = u30()
 795                 if vindex != 0:
 796                     read_byte()  # vkind
 797             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 798                 u30()  # disp_id
 799                 method_idx = u30()
 800                 methods[multinames[trait_name_idx]] = method_idx
 801             elif kind == 0x04:  # Class
 802                 u30()  # slot_id
 803                 u30()  # classi
 804             elif kind == 0x05:  # Function
 805                 u30()  # slot_id
 806                 function_idx = u30()
 807                 methods[function_idx] = multinames[trait_name_idx]
 808             else:
 809                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 810
 811             if attrs & 0x4 != 0:  # Metadata present
 812                 metadata_count = u30()
 813                 for _c3 in range(metadata_count):
 814                     u30()  # metadata index
 815
 816             return methods
 817
 818         # Classes
 819         TARGET_CLASSNAME = u'SignatureDecipher'
 820         searched_idx = multinames.index(TARGET_CLASSNAME)
 821         searched_class_id = None
 822         class_count = u30()
 823         for class_id in range(class_count):
 824             name_idx = u30()
 825             if name_idx == searched_idx:
 826                 # We found the class we're looking for!
 827                 searched_class_id = class_id
 828             u30()  # super_name idx
 829             flags = read_byte()
 830             if flags & 0x08 != 0:  # Protected namespace is present
 831                 u30()  # protected_ns_idx
 832             intrf_count = u30()
 833             for _c2 in range(intrf_count):
 834                 u30()
 835             u30()  # iinit
 836             trait_count = u30()
 837             for _c2 in range(trait_count):
 838                 parse_traits_info()
 839
 840         if searched_class_id is None:
 841             raise ExtractorError(u'Target class %r not found' %
 842                                  TARGET_CLASSNAME)
 843
 844         method_names = {}
 845         method_idxs = {}
 846         for class_id in range(class_count):
 847             u30()  # cinit
 848             trait_count = u30()
 849             for _c2 in range(trait_count):
 850                 trait_methods = parse_traits_info()
 851                 if class_id == searched_class_id:
 852                     method_names.update(trait_methods.items())
 853                     method_idxs.update(dict(
 854                         (idx, name)
 855                         for name, idx in trait_methods.items()))
 856
 857         # Scripts
 858         script_count = u30()
 859         for _c in range(script_count):
 860             u30()  # init
 861             trait_count = u30()
 862             for _c2 in range(trait_count):
 863                 parse_traits_info()
 864
 865         # Method bodies
 866         method_body_count = u30()
 867         Method = collections.namedtuple('Method', ['code', 'local_count'])
 868         methods = {}
 869         for _c in range(method_body_count):
 870             method_idx = u30()
 871             u30()  # max_stack
 872             local_count = u30()
 873             u30()  # init_scope_depth
 874             u30()  # max_scope_depth
 875             code_length = u30()
 876             code = read_bytes(code_length)
 877             if method_idx in method_idxs:
 878                 m = Method(code, local_count)
 879                 methods[method_idxs[method_idx]] = m
 880             exception_count = u30()
 881             for _c2 in range(exception_count):
 882                 u30()  # from
 883                 u30()  # to
 884                 u30()  # target
 885                 u30()  # exc_type
 886                 u30()  # var_name
 887             trait_count = u30()
 888             for _c2 in range(trait_count):
 889                 parse_traits_info()
 890
 891         assert p + code_reader.tell() == len(code_tag)
 892         assert len(methods) == len(method_idxs)
 893
 894         method_pyfunctions = {}
 895
 896         def extract_function(func_name):
 897             if func_name in method_pyfunctions:
 898                 return method_pyfunctions[func_name]
 899             if func_name not in methods:
 900                 raise ExtractorError(u'Cannot find function %r' % func_name)
 901             m = methods[func_name]
 902
 903             def resfunc(args):
 904                 registers = ['(this)'] + list(args) + [None] * m.local_count
 905                 stack = []
 906                 coder = io.BytesIO(m.code)
 907                 while True:
 908                     opcode = struct.unpack('!B', coder.read(1))[0]
 909                     if opcode == 36:  # pushbyte
 910                         v = struct.unpack('!B', coder.read(1))[0]
 911                         stack.append(v)
 912                     elif opcode == 44:  # pushstring
 913                         idx = u30(coder)
 914                         stack.append(constant_strings[idx])
 915                     elif opcode == 48:  # pushscope
 916                         # We don't implement the scope register, so we'll just
 917                         # ignore the popped value
 918                         stack.pop()
 919                     elif opcode == 70:  # callproperty
 920                         index = u30(coder)
 921                         mname = multinames[index]
 922                         arg_count = u30(coder)
 923                         args = list(reversed(
 924                             [stack.pop() for _ in range(arg_count)]))
 925                         obj = stack.pop()
 926                         if mname == u'split':
 927                             assert len(args) == 1
 928                             assert isinstance(args[0], compat_str)
 929                             assert isinstance(obj, compat_str)
 930                             if args[0] == u'':
 931                                 res = list(obj)
 932                             else:
 933                                 res = obj.split(args[0])
 934                             stack.append(res)
 935                         elif mname == u'slice':
 936                             assert len(args) == 1
 937                             assert isinstance(args[0], int)
 938                             assert isinstance(obj, list)
 939                             res = obj[args[0]:]
 940                             stack.append(res)
 941                         elif mname == u'join':
 942                             assert len(args) == 1
 943                             assert isinstance(args[0], compat_str)
 944                             assert isinstance(obj, list)
 945                             res = args[0].join(obj)
 946                             stack.append(res)
 947                         elif mname in method_pyfunctions:
 948                             stack.append(method_pyfunctions[mname](args))
 949                         else:
 950                             raise NotImplementedError(
 951                                 u'Unsupported property %r on %r'
 952                                 % (mname, obj))
 953                     elif opcode == 72:  # returnvalue
 954                         res = stack.pop()
 955                         return res
 956                     elif opcode == 79:  # callpropvoid
 957                         index = u30(coder)
 958                         mname = multinames[index]
 959                         arg_count = u30(coder)
 960                         args = list(reversed(
 961                             [stack.pop() for _ in range(arg_count)]))
 962                         obj = stack.pop()
 963                         if mname == u'reverse':
 964                             assert isinstance(obj, list)
 965                             obj.reverse()
 966                         else:
 967                             raise NotImplementedError(
 968                                 u'Unsupported (void) property %r on %r'
 969                                 % (mname, obj))
 970                     elif opcode == 93:  # findpropstrict
 971                         index = u30(coder)
 972                         mname = multinames[index]
 973                         res = extract_function(mname)
 974                         stack.append(res)
 975                     elif opcode == 97:  # setproperty
 976                         index = u30(coder)
 977                         value = stack.pop()
 978                         idx = stack.pop()
 979                         obj = stack.pop()
 980                         assert isinstance(obj, list)
 981                         assert isinstance(idx, int)
 982                         obj[idx] = value
 983                     elif opcode == 98:  # getlocal
 984                         index = u30(coder)
 985                         stack.append(registers[index])
 986                     elif opcode == 99:  # setlocal
 987                         index = u30(coder)
 988                         value = stack.pop()
 989                         registers[index] = value
 990                     elif opcode == 102:  # getproperty
 991                         index = u30(coder)
 992                         pname = multinames[index]
 993                         if pname == u'length':
 994                             obj = stack.pop()
 995                             assert isinstance(obj, list)
 996                             stack.append(len(obj))
 997                         else:  # Assume attribute access
 998                             idx = stack.pop()
 999                             assert isinstance(idx, int)
1000                             obj = stack.pop()
1001                             assert isinstance(obj, list)
1002                             stack.append(obj[idx])
1003                     elif opcode == 128:  # coerce
1004                         u30(coder)
1005                     elif opcode == 133:  # coerce_s
1006                         assert isinstance(stack[-1], (type(None), compat_str))
1007                     elif opcode == 164:  # modulo
1008                         value2 = stack.pop()
1009                         value1 = stack.pop()
1010                         res = value1 % value2
1011                         stack.append(res)
1012                     elif opcode == 208:  # getlocal_0
1013                         stack.append(registers[0])
1014                     elif opcode == 209:  # getlocal_1
1015                         stack.append(registers[1])
1016                     elif opcode == 210:  # getlocal_2
1017                         stack.append(registers[2])
1018                     elif opcode == 211:  # getlocal_3
1019                         stack.append(registers[3])
1020                     elif opcode == 214:  # setlocal_2
1021                         registers[2] = stack.pop()
1022                     elif opcode == 215:  # setlocal_3
1023                         registers[3] = stack.pop()
1024                     else:
1025                         raise NotImplementedError(
1026                             u'Unsupported opcode %d' % opcode)
1027
1028             method_pyfunctions[func_name] = resfunc
1029             return resfunc
1030
1031         initial_function = extract_function(u'decipher')
1032         return lambda s: initial_function([s])
1033
1034     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1035         """Turn the encrypted s field into a working signature"""
1036
1037         if player_url is not None:
1038             try:
1039                 if player_url not in self._player_cache:
1040                     func = self._extract_signature_function(
1041                         video_id, player_url, len(s)
1042                     )
1043                     self._player_cache[player_url] = func
1044                 func = self._player_cache[player_url]
1045                 if self._downloader.params.get('youtube_print_sig_code'):
1046                     self._print_sig_code(func, len(s))
1047                 return func(s)
1048             except Exception:
1049                 tb = traceback.format_exc()
1050                 self._downloader.report_warning(
1051                     u'Automatic signature extraction failed: ' + tb)
1052
1053             self._downloader.report_warning(
1054                 u'Warning: Falling back to static signature algorithm')
1055
1056         return self._static_decrypt_signature(
1057             s, video_id, player_url, age_gate)
1058
1059     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1060         if age_gate:
1061             # The videos with age protection use another player, so the
1062             # algorithms can be different.
1063             if len(s) == 86:
1064                 return s[2:63] + s[82] + s[64:82] + s[63]
1065
1066         if len(s) == 93:
1067             return s[86:29:-1] + s[88] + s[28:5:-1]
1068         elif len(s) == 92:
1069             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1070         elif len(s) == 91:
1071             return s[84:27:-1] + s[86] + s[26:5:-1]
1072         elif len(s) == 90:
1073             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1074         elif len(s) == 89:
1075             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1076         elif len(s) == 88:
1077             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1078         elif len(s) == 87:
1079             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1080         elif len(s) == 86:
1081             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1082         elif len(s) == 85:
1083             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1084         elif len(s) == 84:
1085             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1086         elif len(s) == 83:
1087             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1088         elif len(s) == 82:
1089             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1090         elif len(s) == 81:
1091             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1092         elif len(s) == 80:
1093             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1094         elif len(s) == 79:
1095             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1096
1097         else:
1098             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1099
1100     def _get_available_subtitles(self, video_id):
1101         try:
1102             sub_list = self._download_webpage(
1103                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1104                 video_id, note=False)
1105         except ExtractorError as err:
1106             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1107             return {}
1108         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1109
1110         sub_lang_list = {}
1111         for l in lang_list:
1112             lang = l[1]
1113             params = compat_urllib_parse.urlencode({
1114                 'lang': lang,
1115                 'v': video_id,
1116                 'fmt': self._downloader.params.get('subtitlesformat'),
1117             })
1118             url = u'http://www.youtube.com/api/timedtext?' + params
1119             sub_lang_list[lang] = url
1120         if not sub_lang_list:
1121             self._downloader.report_warning(u'video doesn\'t have subtitles')
1122             return {}
1123         return sub_lang_list
1124
1125     def _get_available_automatic_caption(self, video_id, webpage):
1126         """We need the webpage for getting the captions url, pass it as an
1127            argument to speed up the process."""
1128         sub_format = self._downloader.params.get('subtitlesformat')
1129         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1130         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1131         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1132         if mobj is None:
1133             self._downloader.report_warning(err_msg)
1134             return {}
1135         player_config = json.loads(mobj.group(1))
1136         try:
1137             args = player_config[u'args']
1138             caption_url = args[u'ttsurl']
1139             timestamp = args[u'timestamp']
1140             # We get the available subtitles
1141             list_params = compat_urllib_parse.urlencode({
1142                 'type': 'list',
1143                 'tlangs': 1,
1144                 'asrs': 1,
1145             })
1146             list_url = caption_url + '&' + list_params
1147             list_page = self._download_webpage(list_url, video_id)
1148             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1149             original_lang_node = caption_list.find('track')
1150             if original_lang_node.attrib.get('kind') != 'asr' :
1151                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1152                 return {}
1153             original_lang = original_lang_node.attrib['lang_code']
1154
1155             sub_lang_list = {}
1156             for lang_node in caption_list.findall('target'):
1157                 sub_lang = lang_node.attrib['lang_code']
1158                 params = compat_urllib_parse.urlencode({
1159                     'lang': original_lang,
1160                     'tlang': sub_lang,
1161                     'fmt': sub_format,
1162                     'ts': timestamp,
1163                     'kind': 'asr',
1164                 })
1165                 sub_lang_list[sub_lang] = caption_url + '&' + params
1166             return sub_lang_list
1167         # An extractor error can be raise by the download process if there are
1168         # no automatic captions but there are subtitles
1169         except (KeyError, ExtractorError):
1170             self._downloader.report_warning(err_msg)
1171             return {}
1172
1173     def _print_formats(self, formats):
1174         print('Available formats:')
1175         for x in formats:
1176             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1177                                         self._video_dimensions.get(x, '???'),
1178                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1179
1180     def _extract_id(self, url):
1181         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1182         if mobj is None:
1183             raise ExtractorError(u'Invalid URL: %s' % url)
1184         video_id = mobj.group(2)
1185         return video_id
1186
1187     def _get_video_url_list(self, url_map):
1188         """
1189         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1190         with the requested formats.
1191         """
1192         req_format = self._downloader.params.get('format', None)
1193         format_limit = self._downloader.params.get('format_limit', None)
1194         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1195         if format_limit is not None and format_limit in available_formats:
1196             format_list = available_formats[available_formats.index(format_limit):]
1197         else:
1198             format_list = available_formats
1199         existing_formats = [x for x in format_list if x in url_map]
1200         if len(existing_formats) == 0:
1201             raise ExtractorError(u'no known formats available for video')
1202         if self._downloader.params.get('listformats', None):
1203             self._print_formats(existing_formats)
1204             return
1205         if req_format is None or req_format == 'best':
1206             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1207         elif req_format == 'worst':
1208             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1209         elif req_format in ('-1', 'all'):
1210             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1211         else:
1212             # Specific formats. We pick the first in a slash-delimeted sequence.
1213             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1214             # available in the specified format. For example,
1215             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1216             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1217             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1218             req_formats = req_format.split('/')
1219             video_url_list = None
1220             for rf in req_formats:
1221                 if rf in url_map:
1222                     video_url_list = [(rf, url_map[rf])]
1223                     break
1224                 if rf in self._video_formats_map:
1225                     for srf in self._video_formats_map[rf]:
1226                         if srf in url_map:
1227                             video_url_list = [(srf, url_map[srf])]
1228                             break
1229                     else:
1230                         continue
1231                     break
1232             if video_url_list is None:
1233                 raise ExtractorError(u'requested format not available')
1234         return video_url_list
1235
1236     def _extract_from_m3u8(self, manifest_url, video_id):
1237         url_map = {}
1238         def _get_urls(_manifest):
1239             lines = _manifest.split('\n')
1240             urls = filter(lambda l: l and not l.startswith('#'),
1241                             lines)
1242             return urls
1243         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1244         formats_urls = _get_urls(manifest)
1245         for format_url in formats_urls:
1246             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1247             url_map[itag] = format_url
1248         return url_map
1249
1250     def _real_extract(self, url):
1251         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1252             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1253
1254         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1255         mobj = re.search(self._NEXT_URL_RE, url)
1256         if mobj:
1257             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1258         video_id = self._extract_id(url)
1259
1260         # Get video webpage
1261         self.report_video_webpage_download(video_id)
1262         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1263         request = compat_urllib_request.Request(url)
1264         try:
1265             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1266         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1267             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1268
1269         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1270
1271         # Attempt to extract SWF player URL
1272         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1273         if mobj is not None:
1274             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1275         else:
1276             player_url = None
1277
1278         # Get video info
1279         self.report_video_info_webpage_download(video_id)
1280         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1281             self.report_age_confirmation()
1282             age_gate = True
1283             # We simulate the access to the video from www.youtube.com/v/{video_id}
1284             # this can be viewed without login into Youtube
1285             data = compat_urllib_parse.urlencode({'video_id': video_id,
1286                                                   'el': 'embedded',
1287                                                   'gl': 'US',
1288                                                   'hl': 'en',
1289                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1290                                                   'asv': 3,
1291                                                   'sts':'1588',
1292                                                   })
1293             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1294             video_info_webpage = self._download_webpage(video_info_url, video_id,
1295                                     note=False,
1296                                     errnote='unable to download video info webpage')
1297             video_info = compat_parse_qs(video_info_webpage)
1298         else:
1299             age_gate = False
1300             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1301                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1302                         % (video_id, el_type))
1303                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1304                                         note=False,
1305                                         errnote='unable to download video info webpage')
1306                 video_info = compat_parse_qs(video_info_webpage)
1307                 if 'token' in video_info:
1308                     break
1309         if 'token' not in video_info:
1310             if 'reason' in video_info:
1311                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1312             else:
1313                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1314
1315         # Check for "rental" videos
1316         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1317             raise ExtractorError(u'"rental" videos not supported')
1318
1319         # Start extracting information
1320         self.report_information_extraction(video_id)
1321
1322         # uploader
1323         if 'author' not in video_info:
1324             raise ExtractorError(u'Unable to extract uploader name')
1325         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1326
1327         # uploader_id
1328         video_uploader_id = None
1329         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1330         if mobj is not None:
1331             video_uploader_id = mobj.group(1)
1332         else:
1333             self._downloader.report_warning(u'unable to extract uploader nickname')
1334
1335         # title
1336         if 'title' not in video_info:
1337             raise ExtractorError(u'Unable to extract video title')
1338         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1339
1340         # thumbnail image
1341         # We try first to get a high quality image:
1342         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1343                             video_webpage, re.DOTALL)
1344         if m_thumb is not None:
1345             video_thumbnail = m_thumb.group(1)
1346         elif 'thumbnail_url' not in video_info:
1347             self._downloader.report_warning(u'unable to extract video thumbnail')
1348             video_thumbnail = None
1349         else:   # don't panic if we can't find it
1350             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1351
1352         # upload date
1353         upload_date = None
1354         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1355         if mobj is not None:
1356             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1357             upload_date = unified_strdate(upload_date)
1358
1359         # description
1360         video_description = get_element_by_id("eow-description", video_webpage)
1361         if video_description:
1362             video_description = clean_html(video_description)
1363         else:
1364             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1365             if fd_mobj:
1366                 video_description = unescapeHTML(fd_mobj.group(1))
1367             else:
1368                 video_description = u''
1369
1370         # subtitles
1371         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1372
1373         if self._downloader.params.get('listsubtitles', False):
1374             self._list_available_subtitles(video_id, video_webpage)
1375             return
1376
1377         if 'length_seconds' not in video_info:
1378             self._downloader.report_warning(u'unable to extract video duration')
1379             video_duration = ''
1380         else:
1381             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1382
1383         # Decide which formats to download
1384
1385         try:
1386             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1387             if not mobj:
1388                 raise ValueError('Could not find vevo ID')
1389             info = json.loads(mobj.group(1))
1390             args = info['args']
1391             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1392             # this signatures are encrypted
1393             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1394             if m_s is not None:
1395                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1396                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1397             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1398             if m_s is not None:
1399                 if 'url_encoded_fmt_stream_map' in video_info:
1400                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1401                 else:
1402                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1403             elif 'adaptive_fmts' in video_info:
1404                 if 'url_encoded_fmt_stream_map' in video_info:
1405                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1406                 else:
1407                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1408         except ValueError:
1409             pass
1410
1411         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1412             self.report_rtmp_download()
1413             video_url_list = [(None, video_info['conn'][0])]
1414         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1415             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1416                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1417             url_map = {}
1418             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1419                 url_data = compat_parse_qs(url_data_str)
1420                 if 'itag' in url_data and 'url' in url_data:
1421                     url = url_data['url'][0]
1422                     if 'sig' in url_data:
1423                         url += '&signature=' + url_data['sig'][0]
1424                     elif 's' in url_data:
1425                         encrypted_sig = url_data['s'][0]
1426                         if self._downloader.params.get('verbose'):
1427                             if age_gate:
1428                                 if player_url is None:
1429                                     player_version = 'unknown'
1430                                 else:
1431                                     player_version = self._search_regex(
1432                                         r'-(.+)\.swf$', player_url,
1433                                         u'flash player', fatal=False)
1434                                 player_desc = 'flash player %s' % player_version
1435                             else:
1436                                 player_version = self._search_regex(
1437                                     r'html5player-(.+?)\.js', video_webpage,
1438                                     'html5 player', fatal=False)
1439                                 player_desc = u'html5 player %s' % player_version
1440
1441                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1442                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1443                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1444
1445                         if not age_gate:
1446                             jsplayer_url_json = self._search_regex(
1447                                 r'"assets":.+?"js":\s*("[^"]+")',
1448                                 video_webpage, u'JS player URL')
1449                             player_url = json.loads(jsplayer_url_json)
1450
1451                         signature = self._decrypt_signature(
1452                             encrypted_sig, video_id, player_url, age_gate)
1453                         url += '&signature=' + signature
1454                     if 'ratebypass' not in url:
1455                         url += '&ratebypass=yes'
1456                     url_map[url_data['itag'][0]] = url
1457             video_url_list = self._get_video_url_list(url_map)
1458             if not video_url_list:
1459                 return
1460         elif video_info.get('hlsvp'):
1461             manifest_url = video_info['hlsvp'][0]
1462             url_map = self._extract_from_m3u8(manifest_url, video_id)
1463             video_url_list = self._get_video_url_list(url_map)
1464             if not video_url_list:
1465                 return
1466
1467         else:
1468             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1469
1470         results = []
1471         for format_param, video_real_url in video_url_list:
1472             # Extension
1473             video_extension = self._video_extensions.get(format_param, 'flv')
1474
1475             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1476                                               self._video_dimensions.get(format_param, '???'),
1477                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1478
1479             results.append({
1480                 'id':       video_id,
1481                 'url':      video_real_url,
1482                 'uploader': video_uploader,
1483                 'uploader_id': video_uploader_id,
1484                 'upload_date':  upload_date,
1485                 'title':    video_title,
1486                 'ext':      video_extension,
1487                 'format':   video_format,
1488                 'thumbnail':    video_thumbnail,
1489                 'description':  video_description,
1490                 'player_url':   player_url,
1491                 'subtitles':    video_subtitles,
1492                 'duration':     video_duration
1493             })
1494         return results
1495
1496 class YoutubePlaylistIE(InfoExtractor):
1497     IE_DESC = u'YouTube.com playlists'
1498     _VALID_URL = r"""(?:
1499                         (?:https?://)?
1500                         (?:\w+\.)?
1501                         youtube\.com/
1502                         (?:
1503                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1504                            \? (?:.*?&)*? (?:p|a|list)=
1505                         |  p/
1506                         )
1507                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1508                         .*
1509                      |
1510                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1511                      )"""
1512     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1513     _MAX_RESULTS = 50
1514     IE_NAME = u'youtube:playlist'
1515
1516     @classmethod
1517     def suitable(cls, url):
1518         """Receives a URL and returns True if suitable for this IE."""
1519         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1520
1521     def _real_extract(self, url):
1522         # Extract playlist id
1523         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1524         if mobj is None:
1525             raise ExtractorError(u'Invalid URL: %s' % url)
1526
1527         # Download playlist videos from API
1528         playlist_id = mobj.group(1) or mobj.group(2)
1529         videos = []
1530
1531         for page_num in itertools.count(1):
1532             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1533             if start_index >= 1000:
1534                 self._downloader.report_warning(u'Max number of results reached')
1535                 break
1536             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1537             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1538
1539             try:
1540                 response = json.loads(page)
1541             except ValueError as err:
1542                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1543
1544             if 'feed' not in response:
1545                 raise ExtractorError(u'Got a malformed response from YouTube API')
1546             playlist_title = response['feed']['title']['$t']
1547             if 'entry' not in response['feed']:
1548                 # Number of videos is a multiple of self._MAX_RESULTS
1549                 break
1550
1551             for entry in response['feed']['entry']:
1552                 index = entry['yt$position']['$t']
1553                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1554                     videos.append((
1555                         index,
1556                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1557                     ))
1558
1559         videos = [v[1] for v in sorted(videos)]
1560
1561         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1562         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1563
1564
1565 class YoutubeChannelIE(InfoExtractor):
1566     IE_DESC = u'YouTube.com channels'
1567     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1568     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1569     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1570     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1571     IE_NAME = u'youtube:channel'
1572
1573     def extract_videos_from_page(self, page):
1574         ids_in_page = []
1575         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1576             if mobj.group(1) not in ids_in_page:
1577                 ids_in_page.append(mobj.group(1))
1578         return ids_in_page
1579
1580     def _real_extract(self, url):
1581         # Extract channel id
1582         mobj = re.match(self._VALID_URL, url)
1583         if mobj is None:
1584             raise ExtractorError(u'Invalid URL: %s' % url)
1585
1586         # Download channel page
1587         channel_id = mobj.group(1)
1588         video_ids = []
1589         pagenum = 1
1590
1591         url = self._TEMPLATE_URL % (channel_id, pagenum)
1592         page = self._download_webpage(url, channel_id,
1593                                       u'Downloading page #%s' % pagenum)
1594
1595         # Extract video identifiers
1596         ids_in_page = self.extract_videos_from_page(page)
1597         video_ids.extend(ids_in_page)
1598
1599         # Download any subsequent channel pages using the json-based channel_ajax query
1600         if self._MORE_PAGES_INDICATOR in page:
1601             for pagenum in itertools.count(1):
1602                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1603                 page = self._download_webpage(url, channel_id,
1604                                               u'Downloading page #%s' % pagenum)
1605
1606                 page = json.loads(page)
1607
1608                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1609                 video_ids.extend(ids_in_page)
1610
1611                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1612                     break
1613
1614         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1615
1616         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1617         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1618         return [self.playlist_result(url_entries, channel_id)]
1619
1620
1621 class YoutubeUserIE(InfoExtractor):
1622     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1623     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1624     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1625     _GDATA_PAGE_SIZE = 50
1626     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1627     IE_NAME = u'youtube:user'
1628
1629     @classmethod
1630     def suitable(cls, url):
1631         # Don't return True if the url can be extracted with other youtube
1632         # extractor, the regex would is too permissive and it would match.
1633         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1634         if any(ie.suitable(url) for ie in other_ies): return False
1635         else: return super(YoutubeUserIE, cls).suitable(url)
1636
1637     def _real_extract(self, url):
1638         # Extract username
1639         mobj = re.match(self._VALID_URL, url)
1640         if mobj is None:
1641             raise ExtractorError(u'Invalid URL: %s' % url)
1642
1643         username = mobj.group(1)
1644
1645         # Download video ids using YouTube Data API. Result size per
1646         # query is limited (currently to 50 videos) so we need to query
1647         # page by page until there are no video ids - it means we got
1648         # all of them.
1649
1650         video_ids = []
1651
1652         for pagenum in itertools.count(0):
1653             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1654
1655             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1656             page = self._download_webpage(gdata_url, username,
1657                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1658
1659             try:
1660                 response = json.loads(page)
1661             except ValueError as err:
1662                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1663             if 'entry' not in response['feed']:
1664                 # Number of videos is a multiple of self._MAX_RESULTS
1665                 break
1666
1667             # Extract video identifiers
1668             ids_in_page = []
1669             for entry in response['feed']['entry']:
1670                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1671             video_ids.extend(ids_in_page)
1672
1673             # A little optimization - if current page is not
1674             # "full", ie. does not contain PAGE_SIZE video ids then
1675             # we can assume that this page is the last one - there
1676             # are no more ids on further pages - no need to query
1677             # again.
1678
1679             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1680                 break
1681
1682         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1683         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1684         return [self.playlist_result(url_results, playlist_title = username)]
1685
1686 class YoutubeSearchIE(SearchInfoExtractor):
1687     IE_DESC = u'YouTube.com searches'
1688     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1689     _MAX_RESULTS = 1000
1690     IE_NAME = u'youtube:search'
1691     _SEARCH_KEY = 'ytsearch'
1692
1693     def report_download_page(self, query, pagenum):
1694         """Report attempt to download search page with given number."""
1695         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1696
1697     def _get_n_results(self, query, n):
1698         """Get a specified number of results for a query"""
1699
1700         video_ids = []
1701         pagenum = 0
1702         limit = n
1703
1704         while (50 * pagenum) < limit:
1705             self.report_download_page(query, pagenum+1)
1706             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1707             request = compat_urllib_request.Request(result_url)
1708             try:
1709                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1710             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1711                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1712             api_response = json.loads(data)['data']
1713
1714             if not 'items' in api_response:
1715                 raise ExtractorError(u'[youtube] No video results')
1716
1717             new_ids = list(video['id'] for video in api_response['items'])
1718             video_ids += new_ids
1719
1720             limit = min(n, api_response['totalItems'])
1721             pagenum += 1
1722
1723         if len(video_ids) > n:
1724             video_ids = video_ids[:n]
1725         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1726         return self.playlist_result(videos, query)
1727
1728
1729 class YoutubeShowIE(InfoExtractor):
1730     IE_DESC = u'YouTube.com (multi-season) shows'
1731     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1732     IE_NAME = u'youtube:show'
1733
1734     def _real_extract(self, url):
1735         mobj = re.match(self._VALID_URL, url)
1736         show_name = mobj.group(1)
1737         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1738         # There's one playlist for each season of the show
1739         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1740         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1741         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1742
1743
1744 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1745     """
1746     Base class for extractors that fetch info from
1747     http://www.youtube.com/feed_ajax
1748     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1749     """
1750     _LOGIN_REQUIRED = True
1751     _PAGING_STEP = 30
1752     # use action_load_personal_feed instead of action_load_system_feed
1753     _PERSONAL_FEED = False
1754
1755     @property
1756     def _FEED_TEMPLATE(self):
1757         action = 'action_load_system_feed'
1758         if self._PERSONAL_FEED:
1759             action = 'action_load_personal_feed'
1760         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1761
1762     @property
1763     def IE_NAME(self):
1764         return u'youtube:%s' % self._FEED_NAME
1765
1766     def _real_initialize(self):
1767         self._login()
1768
1769     def _real_extract(self, url):
1770         feed_entries = []
1771         # The step argument is available only in 2.7 or higher
1772         for i in itertools.count(0):
1773             paging = i*self._PAGING_STEP
1774             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1775                                           u'%s feed' % self._FEED_NAME,
1776                                           u'Downloading page %s' % i)
1777             info = json.loads(info)
1778             feed_html = info['feed_html']
1779             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1780             ids = orderedSet(m.group(1) for m in m_ids)
1781             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1782             if info['paging'] is None:
1783                 break
1784         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1785
1786 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1787     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1788     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1789     _FEED_NAME = 'subscriptions'
1790     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1791
1792 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1793     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1794     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1795     _FEED_NAME = 'recommended'
1796     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1797
1798 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1799     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1800     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1801     _FEED_NAME = 'watch_later'
1802     _PLAYLIST_TITLE = u'Youtube Watch Later'
1803     _PAGING_STEP = 100
1804     _PERSONAL_FEED = True
1805
1806 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1807     IE_NAME = u'youtube:favorites'
1808     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1809     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1810     _LOGIN_REQUIRED = True
1811
1812     def _real_extract(self, url):
1813         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1814         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1815         return self.url_result(playlist_id, 'YoutubePlaylist')