_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_cachedir,
  31     get_element_by_id,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = None
  78         dsh = None
  79         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  80         if match:
  81           galx = match.group(1)
  82         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  83         if match:
  84           dsh = match.group(1)
  85
  86         # Log in
  87         login_form_strs = {
  88                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  89                 u'Email': username,
  90                 u'GALX': galx,
  91                 u'Passwd': password,
  92                 u'PersistentCookie': u'yes',
  93                 u'_utf8': u'霱',
  94                 u'bgresponse': u'js_disabled',
  95                 u'checkConnection': u'',
  96                 u'checkedDomains': u'youtube',
  97                 u'dnConn': u'',
  98                 u'dsh': dsh,
  99                 u'pstMsg': u'0',
 100                 u'rmShown': u'1',
 101                 u'secTok': u'',
 102                 u'signIn': u'Sign in',
 103                 u'timeStmp': u'',
 104                 u'service': u'youtube',
 105                 u'uilel': u'3',
 106                 u'hl': u'en_US',
 107         }
 108         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 109         # chokes on unicode
 110         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 111         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 112         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 113         try:
 114             self.report_login()
 115             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 116             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 117                 self._downloader.report_warning(u'unable to log in: bad username or password')
 118                 return False
 119         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 120             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 121             return False
 122         return True
 123
 124     def _confirm_age(self):
 125         age_form = {
 126                 'next_url':     '/',
 127                 'action_confirm':   'Confirm',
 128                 }
 129         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 130         try:
 131             self.report_age_confirmation()
 132             compat_urllib_request.urlopen(request).read().decode('utf-8')
 133         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 134             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 135         return True
 136
 137     def _real_initialize(self):
 138         if self._downloader is None:
 139             return
 140         if not self._set_language():
 141             return
 142         if not self._login():
 143             return
 144         self._confirm_age()
 145
 146
 147 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 148     IE_DESC = u'YouTube.com'
 149     _VALID_URL = r"""^
 150                      (
 151                          (?:https?://)?                                       # http(s):// (optional)
 152                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 153                             tube\.majestyc\.net/|
 154                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 155                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 156                          (?:                                                  # the various things that can precede the ID:
 157                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 158                              |(?:                                             # or the v= param in all its forms
 159                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 160                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 161                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 162                                  v=
 163                              )
 164                          ))
 165                          |youtu\.be/                                          # just youtu.be/xxxx
 166                          )
 167                      )?                                                       # all until now is optional -> you can pass the naked ID
 168                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 169                      (?(1).+)?                                                # if we found the ID, everything can follow
 170                      $"""
 171     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 172     # Listed in order of quality
 173     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 174                           # Apple HTTP Live Streaming
 175                           '96', '95', '94', '93', '92', '132', '151',
 176                           # 3D
 177                           '85', '84', '102', '83', '101', '82', '100',
 178                           # Dash video
 179                           '138', '137', '248', '136', '247', '135', '246',
 180                           '245', '244', '134', '243', '133', '242', '160',
 181                           # Dash audio
 182                           '141', '172', '140', '171', '139',
 183                           ]
 184     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 185                                       # Apple HTTP Live Streaming
 186                                       '96', '95', '94', '93', '92', '132', '151',
 187                                       # 3D
 188                                       '85', '102', '84', '101', '83', '100', '82',
 189                                       # Dash video
 190                                       '138', '248', '137', '247', '136', '246', '245',
 191                                       '244', '135', '243', '134', '242', '133', '160',
 192                                       # Dash audio
 193                                       '172', '141', '171', '140', '139',
 194                                       ]
 195     _video_formats_map = {
 196         'flv': ['35', '34', '6', '5'],
 197         '3gp': ['36', '17', '13'],
 198         'mp4': ['38', '37', '22', '18'],
 199         'webm': ['46', '45', '44', '43'],
 200     }
 201     _video_extensions = {
 202         '13': '3gp',
 203         '17': '3gp',
 204         '18': 'mp4',
 205         '22': 'mp4',
 206         '36': '3gp',
 207         '37': 'mp4',
 208         '38': 'mp4',
 209         '43': 'webm',
 210         '44': 'webm',
 211         '45': 'webm',
 212         '46': 'webm',
 213
 214         # 3d videos
 215         '82': 'mp4',
 216         '83': 'mp4',
 217         '84': 'mp4',
 218         '85': 'mp4',
 219         '100': 'webm',
 220         '101': 'webm',
 221         '102': 'webm',
 222
 223         # Apple HTTP Live Streaming
 224         '92': 'mp4',
 225         '93': 'mp4',
 226         '94': 'mp4',
 227         '95': 'mp4',
 228         '96': 'mp4',
 229         '132': 'mp4',
 230         '151': 'mp4',
 231
 232         # Dash mp4
 233         '133': 'mp4',
 234         '134': 'mp4',
 235         '135': 'mp4',
 236         '136': 'mp4',
 237         '137': 'mp4',
 238         '138': 'mp4',
 239         '160': 'mp4',
 240
 241         # Dash mp4 audio
 242         '139': 'm4a',
 243         '140': 'm4a',
 244         '141': 'm4a',
 245
 246         # Dash webm
 247         '171': 'webm',
 248         '172': 'webm',
 249         '242': 'webm',
 250         '243': 'webm',
 251         '244': 'webm',
 252         '245': 'webm',
 253         '246': 'webm',
 254         '247': 'webm',
 255         '248': 'webm',
 256     }
 257     _video_dimensions = {
 258         '5': '240x400',
 259         '6': '???',
 260         '13': '???',
 261         '17': '144x176',
 262         '18': '360x640',
 263         '22': '720x1280',
 264         '34': '360x640',
 265         '35': '480x854',
 266         '36': '240x320',
 267         '37': '1080x1920',
 268         '38': '3072x4096',
 269         '43': '360x640',
 270         '44': '480x854',
 271         '45': '720x1280',
 272         '46': '1080x1920',
 273         '82': '360p',
 274         '83': '480p',
 275         '84': '720p',
 276         '85': '1080p',
 277         '92': '240p',
 278         '93': '360p',
 279         '94': '480p',
 280         '95': '720p',
 281         '96': '1080p',
 282         '100': '360p',
 283         '101': '480p',
 284         '102': '720p',
 285         '132': '240p',
 286         '151': '72p',
 287         '133': '240p',
 288         '134': '360p',
 289         '135': '480p',
 290         '136': '720p',
 291         '137': '1080p',
 292         '138': '>1080p',
 293         '139': '48k',
 294         '140': '128k',
 295         '141': '256k',
 296         '160': '192p',
 297         '171': '128k',
 298         '172': '256k',
 299         '242': '240p',
 300         '243': '360p',
 301         '244': '480p',
 302         '245': '480p',
 303         '246': '480p',
 304         '247': '720p',
 305         '248': '1080p',
 306     }
 307     _special_itags = {
 308         '82': '3D',
 309         '83': '3D',
 310         '84': '3D',
 311         '85': '3D',
 312         '100': '3D',
 313         '101': '3D',
 314         '102': '3D',
 315         '133': 'DASH Video',
 316         '134': 'DASH Video',
 317         '135': 'DASH Video',
 318         '136': 'DASH Video',
 319         '137': 'DASH Video',
 320         '138': 'DASH Video',
 321         '139': 'DASH Audio',
 322         '140': 'DASH Audio',
 323         '141': 'DASH Audio',
 324         '160': 'DASH Video',
 325         '171': 'DASH Audio',
 326         '172': 'DASH Audio',
 327         '242': 'DASH Video',
 328         '243': 'DASH Video',
 329         '244': 'DASH Video',
 330         '245': 'DASH Video',
 331         '246': 'DASH Video',
 332         '247': 'DASH Video',
 333         '248': 'DASH Video',
 334     }
 335
 336     IE_NAME = u'youtube'
 337     _TESTS = [
 338         {
 339             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 340             u"file":  u"BaW_jenozKc.mp4",
 341             u"info_dict": {
 342                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 343                 u"uploader": u"Philipp Hagemeister",
 344                 u"uploader_id": u"phihag",
 345                 u"upload_date": u"20121002",
 346                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 347             }
 348         },
 349         {
 350             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 351             u"file":  u"1ltcDfZMA3U.flv",
 352             u"note": u"Test VEVO video (#897)",
 353             u"info_dict": {
 354                 u"upload_date": u"20070518",
 355                 u"title": u"Maps - It Will Find You",
 356                 u"description": u"Music video by Maps performing It Will Find You.",
 357                 u"uploader": u"MuteUSA",
 358                 u"uploader_id": u"MuteUSA"
 359             }
 360         },
 361         {
 362             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 363             u"file":  u"UxxajLWwzqY.mp4",
 364             u"note": u"Test generic use_cipher_signature video (#897)",
 365             u"info_dict": {
 366                 u"upload_date": u"20120506",
 367                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 368                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 369                 u"uploader": u"Icona Pop",
 370                 u"uploader_id": u"IconaPop"
 371             }
 372         },
 373         {
 374             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 375             u"file":  u"07FYdnEawAQ.mp4",
 376             u"note": u"Test VEVO video with age protection (#956)",
 377             u"info_dict": {
 378                 u"upload_date": u"20130703",
 379                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 380                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 381                 u"uploader": u"justintimberlakeVEVO",
 382                 u"uploader_id": u"justintimberlakeVEVO"
 383             }
 384         },
 385     ]
 386
 387
 388     @classmethod
 389     def suitable(cls, url):
 390         """Receives a URL and returns True if suitable for this IE."""
 391         if YoutubePlaylistIE.suitable(url): return False
 392         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 393
 394     def __init__(self, *args, **kwargs):
 395         super(YoutubeIE, self).__init__(*args, **kwargs)
 396         self._player_cache = {}
 397
 398     def report_video_webpage_download(self, video_id):
 399         """Report attempt to download video webpage."""
 400         self.to_screen(u'%s: Downloading video webpage' % video_id)
 401
 402     def report_video_info_webpage_download(self, video_id):
 403         """Report attempt to download video info webpage."""
 404         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 405
 406     def report_information_extraction(self, video_id):
 407         """Report attempt to extract video information."""
 408         self.to_screen(u'%s: Extracting video information' % video_id)
 409
 410     def report_unavailable_format(self, video_id, format):
 411         """Report extracted video URL."""
 412         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 413
 414     def report_rtmp_download(self):
 415         """Indicate the download will use the RTMP protocol."""
 416         self.to_screen(u'RTMP download detected')
 417
 418     def _extract_signature_function(self, video_id, player_url, slen):
 419         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 420                         player_url)
 421         player_type = id_m.group('ext')
 422         player_id = id_m.group('id')
 423
 424         # Read from filesystem cache
 425         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 426         assert os.path.basename(func_id) == func_id
 427         cache_dir = get_cachedir(self._downloader.params)
 428
 429         cache_enabled = cache_dir is not None
 430         if cache_enabled:
 431             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 432                                     u'youtube-sigfuncs',
 433                                     func_id + '.json')
 434             try:
 435                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 436                     cache_spec = json.load(cachef)
 437                 return lambda s: u''.join(s[i] for i in cache_spec)
 438             except IOError:
 439                 pass  # No cache available
 440
 441         if player_type == 'js':
 442             code = self._download_webpage(
 443                 player_url, video_id,
 444                 note=u'Downloading %s player %s' % (player_type, player_id),
 445                 errnote=u'Download of %s failed' % player_url)
 446             res = self._parse_sig_js(code)
 447         elif player_type == 'swf':
 448             urlh = self._request_webpage(
 449                 player_url, video_id,
 450                 note=u'Downloading %s player %s' % (player_type, player_id),
 451                 errnote=u'Download of %s failed' % player_url)
 452             code = urlh.read()
 453             res = self._parse_sig_swf(code)
 454         else:
 455             assert False, 'Invalid player type %r' % player_type
 456
 457         if cache_enabled:
 458             try:
 459                 test_string = u''.join(map(compat_chr, range(slen)))
 460                 cache_res = res(test_string)
 461                 cache_spec = [ord(c) for c in cache_res]
 462                 try:
 463                     os.makedirs(os.path.dirname(cache_fn))
 464                 except OSError as ose:
 465                     if ose.errno != errno.EEXIST:
 466                         raise
 467                 write_json_file(cache_spec, cache_fn)
 468             except Exception:
 469                 tb = traceback.format_exc()
 470                 self._downloader.report_warning(
 471                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 472
 473         return res
 474
 475     def _print_sig_code(self, func, slen):
 476         def gen_sig_code(idxs):
 477             def _genslice(start, end, step):
 478                 starts = u'' if start == 0 else str(start)
 479                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 480                 steps = u'' if step == 1 else (u':%d' % step)
 481                 return u's[%s%s%s]' % (starts, ends, steps)
 482
 483             step = None
 484             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 485                                     # set as soon as step is set
 486             for i, prev in zip(idxs[1:], idxs[:-1]):
 487                 if step is not None:
 488                     if i - prev == step:
 489                         continue
 490                     yield _genslice(start, prev, step)
 491                     step = None
 492                     continue
 493                 if i - prev in [-1, 1]:
 494                     step = i - prev
 495                     start = prev
 496                     continue
 497                 else:
 498                     yield u's[%d]' % prev
 499             if step is None:
 500                 yield u's[%d]' % i
 501             else:
 502                 yield _genslice(start, i, step)
 503
 504         test_string = u''.join(map(compat_chr, range(slen)))
 505         cache_res = func(test_string)
 506         cache_spec = [ord(c) for c in cache_res]
 507         expr_code = u' + '.join(gen_sig_code(cache_spec))
 508         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 509         self.to_screen(u'Extracted signature function:\n' + code)
 510
 511     def _parse_sig_js(self, jscode):
 512         funcname = self._search_regex(
 513             r'signature=([a-zA-Z]+)', jscode,
 514             u'Initial JS player signature function name')
 515
 516         functions = {}
 517
 518         def argidx(varname):
 519             return string.lowercase.index(varname)
 520
 521         def interpret_statement(stmt, local_vars, allow_recursion=20):
 522             if allow_recursion < 0:
 523                 raise ExtractorError(u'Recursion limit reached')
 524
 525             if stmt.startswith(u'var '):
 526                 stmt = stmt[len(u'var '):]
 527             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 528                              r'=(?P<expr>.*)$', stmt)
 529             if ass_m:
 530                 if ass_m.groupdict().get('index'):
 531                     def assign(val):
 532                         lvar = local_vars[ass_m.group('out')]
 533                         idx = interpret_expression(ass_m.group('index'),
 534                                                    local_vars, allow_recursion)
 535                         assert isinstance(idx, int)
 536                         lvar[idx] = val
 537                         return val
 538                     expr = ass_m.group('expr')
 539                 else:
 540                     def assign(val):
 541                         local_vars[ass_m.group('out')] = val
 542                         return val
 543                     expr = ass_m.group('expr')
 544             elif stmt.startswith(u'return '):
 545                 assign = lambda v: v
 546                 expr = stmt[len(u'return '):]
 547             else:
 548                 raise ExtractorError(
 549                     u'Cannot determine left side of statement in %r' % stmt)
 550
 551             v = interpret_expression(expr, local_vars, allow_recursion)
 552             return assign(v)
 553
 554         def interpret_expression(expr, local_vars, allow_recursion):
 555             if expr.isdigit():
 556                 return int(expr)
 557
 558             if expr.isalpha():
 559                 return local_vars[expr]
 560
 561             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 562             if m:
 563                 member = m.group('member')
 564                 val = local_vars[m.group('in')]
 565                 if member == 'split("")':
 566                     return list(val)
 567                 if member == 'join("")':
 568                     return u''.join(val)
 569                 if member == 'length':
 570                     return len(val)
 571                 if member == 'reverse()':
 572                     return val[::-1]
 573                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 574                 if slice_m:
 575                     idx = interpret_expression(
 576                         slice_m.group('idx'), local_vars, allow_recursion-1)
 577                     return val[idx:]
 578
 579             m = re.match(
 580                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 581             if m:
 582                 val = local_vars[m.group('in')]
 583                 idx = interpret_expression(m.group('idx'), local_vars,
 584                                            allow_recursion-1)
 585                 return val[idx]
 586
 587             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 588             if m:
 589                 a = interpret_expression(m.group('a'),
 590                                          local_vars, allow_recursion)
 591                 b = interpret_expression(m.group('b'),
 592                                          local_vars, allow_recursion)
 593                 return a % b
 594
 595             m = re.match(
 596                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 597             if m:
 598                 fname = m.group('func')
 599                 if fname not in functions:
 600                     functions[fname] = extract_function(fname)
 601                 argvals = [int(v) if v.isdigit() else local_vars[v]
 602                            for v in m.group('args').split(',')]
 603                 return functions[fname](argvals)
 604             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 605
 606         def extract_function(funcname):
 607             func_m = re.search(
 608                 r'function ' + re.escape(funcname) +
 609                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 610                 jscode)
 611             argnames = func_m.group('args').split(',')
 612
 613             def resf(args):
 614                 local_vars = dict(zip(argnames, args))
 615                 for stmt in func_m.group('code').split(';'):
 616                     res = interpret_statement(stmt, local_vars)
 617                 return res
 618             return resf
 619
 620         initial_function = extract_function(funcname)
 621         return lambda s: initial_function([s])
 622
 623     def _parse_sig_swf(self, file_contents):
 624         if file_contents[1:3] != b'WS':
 625             raise ExtractorError(
 626                 u'Not an SWF file; header is %r' % file_contents[:3])
 627         if file_contents[:1] == b'C':
 628             content = zlib.decompress(file_contents[8:])
 629         else:
 630             raise NotImplementedError(u'Unsupported compression format %r' %
 631                                       file_contents[:1])
 632
 633         def extract_tags(content):
 634             pos = 0
 635             while pos < len(content):
 636                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 637                 pos += 2
 638                 tag_code = header16 >> 6
 639                 tag_len = header16 & 0x3f
 640                 if tag_len == 0x3f:
 641                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 642                     pos += 4
 643                 assert pos+tag_len <= len(content)
 644                 yield (tag_code, content[pos:pos+tag_len])
 645                 pos += tag_len
 646
 647         code_tag = next(tag
 648                         for tag_code, tag in extract_tags(content)
 649                         if tag_code == 82)
 650         p = code_tag.index(b'\0', 4) + 1
 651         code_reader = io.BytesIO(code_tag[p:])
 652
 653         # Parse ABC (AVM2 ByteCode)
 654         def read_int(reader=None):
 655             if reader is None:
 656                 reader = code_reader
 657             res = 0
 658             shift = 0
 659             for _ in range(5):
 660                 buf = reader.read(1)
 661                 assert len(buf) == 1
 662                 b = struct.unpack('<B', buf)[0]
 663                 res = res | ((b & 0x7f) << shift)
 664                 if b & 0x80 == 0:
 665                     break
 666                 shift += 7
 667             return res
 668
 669         def u30(reader=None):
 670             res = read_int(reader)
 671             assert res & 0xf0000000 == 0
 672             return res
 673         u32 = read_int
 674
 675         def s32(reader=None):
 676             v = read_int(reader)
 677             if v & 0x80000000 != 0:
 678                 v = - ((v ^ 0xffffffff) + 1)
 679             return v
 680
 681         def read_string(reader=None):
 682             if reader is None:
 683                 reader = code_reader
 684             slen = u30(reader)
 685             resb = reader.read(slen)
 686             assert len(resb) == slen
 687             return resb.decode('utf-8')
 688
 689         def read_bytes(count, reader=None):
 690             if reader is None:
 691                 reader = code_reader
 692             resb = reader.read(count)
 693             assert len(resb) == count
 694             return resb
 695
 696         def read_byte(reader=None):
 697             resb = read_bytes(1, reader=reader)
 698             res = struct.unpack('<B', resb)[0]
 699             return res
 700
 701         # minor_version + major_version
 702         read_bytes(2 + 2)
 703
 704         # Constant pool
 705         int_count = u30()
 706         for _c in range(1, int_count):
 707             s32()
 708         uint_count = u30()
 709         for _c in range(1, uint_count):
 710             u32()
 711         double_count = u30()
 712         read_bytes((double_count-1) * 8)
 713         string_count = u30()
 714         constant_strings = [u'']
 715         for _c in range(1, string_count):
 716             s = read_string()
 717             constant_strings.append(s)
 718         namespace_count = u30()
 719         for _c in range(1, namespace_count):
 720             read_bytes(1)  # kind
 721             u30()  # name
 722         ns_set_count = u30()
 723         for _c in range(1, ns_set_count):
 724             count = u30()
 725             for _c2 in range(count):
 726                 u30()
 727         multiname_count = u30()
 728         MULTINAME_SIZES = {
 729             0x07: 2,  # QName
 730             0x0d: 2,  # QNameA
 731             0x0f: 1,  # RTQName
 732             0x10: 1,  # RTQNameA
 733             0x11: 0,  # RTQNameL
 734             0x12: 0,  # RTQNameLA
 735             0x09: 2,  # Multiname
 736             0x0e: 2,  # MultinameA
 737             0x1b: 1,  # MultinameL
 738             0x1c: 1,  # MultinameLA
 739         }
 740         multinames = [u'']
 741         for _c in range(1, multiname_count):
 742             kind = u30()
 743             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 744             if kind == 0x07:
 745                 u30()  # namespace_idx
 746                 name_idx = u30()
 747                 multinames.append(constant_strings[name_idx])
 748             else:
 749                 multinames.append('[MULTINAME kind: %d]' % kind)
 750                 for _c2 in range(MULTINAME_SIZES[kind]):
 751                     u30()
 752
 753         # Methods
 754         method_count = u30()
 755         MethodInfo = collections.namedtuple(
 756             'MethodInfo',
 757             ['NEED_ARGUMENTS', 'NEED_REST'])
 758         method_infos = []
 759         for method_id in range(method_count):
 760             param_count = u30()
 761             u30()  # return type
 762             for _ in range(param_count):
 763                 u30()  # param type
 764             u30()  # name index (always 0 for youtube)
 765             flags = read_byte()
 766             if flags & 0x08 != 0:
 767                 # Options present
 768                 option_count = u30()
 769                 for c in range(option_count):
 770                     u30()  # val
 771                     read_bytes(1)  # kind
 772             if flags & 0x80 != 0:
 773                 # Param names present
 774                 for _ in range(param_count):
 775                     u30()  # param name
 776             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 777             method_infos.append(mi)
 778
 779         # Metadata
 780         metadata_count = u30()
 781         for _c in range(metadata_count):
 782             u30()  # name
 783             item_count = u30()
 784             for _c2 in range(item_count):
 785                 u30()  # key
 786                 u30()  # value
 787
 788         def parse_traits_info():
 789             trait_name_idx = u30()
 790             kind_full = read_byte()
 791             kind = kind_full & 0x0f
 792             attrs = kind_full >> 4
 793             methods = {}
 794             if kind in [0x00, 0x06]:  # Slot or Const
 795                 u30()  # Slot id
 796                 u30()  # type_name_idx
 797                 vindex = u30()
 798                 if vindex != 0:
 799                     read_byte()  # vkind
 800             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 801                 u30()  # disp_id
 802                 method_idx = u30()
 803                 methods[multinames[trait_name_idx]] = method_idx
 804             elif kind == 0x04:  # Class
 805                 u30()  # slot_id
 806                 u30()  # classi
 807             elif kind == 0x05:  # Function
 808                 u30()  # slot_id
 809                 function_idx = u30()
 810                 methods[function_idx] = multinames[trait_name_idx]
 811             else:
 812                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 813
 814             if attrs & 0x4 != 0:  # Metadata present
 815                 metadata_count = u30()
 816                 for _c3 in range(metadata_count):
 817                     u30()  # metadata index
 818
 819             return methods
 820
 821         # Classes
 822         TARGET_CLASSNAME = u'SignatureDecipher'
 823         searched_idx = multinames.index(TARGET_CLASSNAME)
 824         searched_class_id = None
 825         class_count = u30()
 826         for class_id in range(class_count):
 827             name_idx = u30()
 828             if name_idx == searched_idx:
 829                 # We found the class we're looking for!
 830                 searched_class_id = class_id
 831             u30()  # super_name idx
 832             flags = read_byte()
 833             if flags & 0x08 != 0:  # Protected namespace is present
 834                 u30()  # protected_ns_idx
 835             intrf_count = u30()
 836             for _c2 in range(intrf_count):
 837                 u30()
 838             u30()  # iinit
 839             trait_count = u30()
 840             for _c2 in range(trait_count):
 841                 parse_traits_info()
 842
 843         if searched_class_id is None:
 844             raise ExtractorError(u'Target class %r not found' %
 845                                  TARGET_CLASSNAME)
 846
 847         method_names = {}
 848         method_idxs = {}
 849         for class_id in range(class_count):
 850             u30()  # cinit
 851             trait_count = u30()
 852             for _c2 in range(trait_count):
 853                 trait_methods = parse_traits_info()
 854                 if class_id == searched_class_id:
 855                     method_names.update(trait_methods.items())
 856                     method_idxs.update(dict(
 857                         (idx, name)
 858                         for name, idx in trait_methods.items()))
 859
 860         # Scripts
 861         script_count = u30()
 862         for _c in range(script_count):
 863             u30()  # init
 864             trait_count = u30()
 865             for _c2 in range(trait_count):
 866                 parse_traits_info()
 867
 868         # Method bodies
 869         method_body_count = u30()
 870         Method = collections.namedtuple('Method', ['code', 'local_count'])
 871         methods = {}
 872         for _c in range(method_body_count):
 873             method_idx = u30()
 874             u30()  # max_stack
 875             local_count = u30()
 876             u30()  # init_scope_depth
 877             u30()  # max_scope_depth
 878             code_length = u30()
 879             code = read_bytes(code_length)
 880             if method_idx in method_idxs:
 881                 m = Method(code, local_count)
 882                 methods[method_idxs[method_idx]] = m
 883             exception_count = u30()
 884             for _c2 in range(exception_count):
 885                 u30()  # from
 886                 u30()  # to
 887                 u30()  # target
 888                 u30()  # exc_type
 889                 u30()  # var_name
 890             trait_count = u30()
 891             for _c2 in range(trait_count):
 892                 parse_traits_info()
 893
 894         assert p + code_reader.tell() == len(code_tag)
 895         assert len(methods) == len(method_idxs)
 896
 897         method_pyfunctions = {}
 898
 899         def extract_function(func_name):
 900             if func_name in method_pyfunctions:
 901                 return method_pyfunctions[func_name]
 902             if func_name not in methods:
 903                 raise ExtractorError(u'Cannot find function %r' % func_name)
 904             m = methods[func_name]
 905
 906             def resfunc(args):
 907                 registers = ['(this)'] + list(args) + [None] * m.local_count
 908                 stack = []
 909                 coder = io.BytesIO(m.code)
 910                 while True:
 911                     opcode = struct.unpack('!B', coder.read(1))[0]
 912                     if opcode == 36:  # pushbyte
 913                         v = struct.unpack('!B', coder.read(1))[0]
 914                         stack.append(v)
 915                     elif opcode == 44:  # pushstring
 916                         idx = u30(coder)
 917                         stack.append(constant_strings[idx])
 918                     elif opcode == 48:  # pushscope
 919                         # We don't implement the scope register, so we'll just
 920                         # ignore the popped value
 921                         stack.pop()
 922                     elif opcode == 70:  # callproperty
 923                         index = u30(coder)
 924                         mname = multinames[index]
 925                         arg_count = u30(coder)
 926                         args = list(reversed(
 927                             [stack.pop() for _ in range(arg_count)]))
 928                         obj = stack.pop()
 929                         if mname == u'split':
 930                             assert len(args) == 1
 931                             assert isinstance(args[0], compat_str)
 932                             assert isinstance(obj, compat_str)
 933                             if args[0] == u'':
 934                                 res = list(obj)
 935                             else:
 936                                 res = obj.split(args[0])
 937                             stack.append(res)
 938                         elif mname == u'slice':
 939                             assert len(args) == 1
 940                             assert isinstance(args[0], int)
 941                             assert isinstance(obj, list)
 942                             res = obj[args[0]:]
 943                             stack.append(res)
 944                         elif mname == u'join':
 945                             assert len(args) == 1
 946                             assert isinstance(args[0], compat_str)
 947                             assert isinstance(obj, list)
 948                             res = args[0].join(obj)
 949                             stack.append(res)
 950                         elif mname in method_pyfunctions:
 951                             stack.append(method_pyfunctions[mname](args))
 952                         else:
 953                             raise NotImplementedError(
 954                                 u'Unsupported property %r on %r'
 955                                 % (mname, obj))
 956                     elif opcode == 72:  # returnvalue
 957                         res = stack.pop()
 958                         return res
 959                     elif opcode == 79:  # callpropvoid
 960                         index = u30(coder)
 961                         mname = multinames[index]
 962                         arg_count = u30(coder)
 963                         args = list(reversed(
 964                             [stack.pop() for _ in range(arg_count)]))
 965                         obj = stack.pop()
 966                         if mname == u'reverse':
 967                             assert isinstance(obj, list)
 968                             obj.reverse()
 969                         else:
 970                             raise NotImplementedError(
 971                                 u'Unsupported (void) property %r on %r'
 972                                 % (mname, obj))
 973                     elif opcode == 93:  # findpropstrict
 974                         index = u30(coder)
 975                         mname = multinames[index]
 976                         res = extract_function(mname)
 977                         stack.append(res)
 978                     elif opcode == 97:  # setproperty
 979                         index = u30(coder)
 980                         value = stack.pop()
 981                         idx = stack.pop()
 982                         obj = stack.pop()
 983                         assert isinstance(obj, list)
 984                         assert isinstance(idx, int)
 985                         obj[idx] = value
 986                     elif opcode == 98:  # getlocal
 987                         index = u30(coder)
 988                         stack.append(registers[index])
 989                     elif opcode == 99:  # setlocal
 990                         index = u30(coder)
 991                         value = stack.pop()
 992                         registers[index] = value
 993                     elif opcode == 102:  # getproperty
 994                         index = u30(coder)
 995                         pname = multinames[index]
 996                         if pname == u'length':
 997                             obj = stack.pop()
 998                             assert isinstance(obj, list)
 999                             stack.append(len(obj))
1000                         else:  # Assume attribute access
1001                             idx = stack.pop()
1002                             assert isinstance(idx, int)
1003                             obj = stack.pop()
1004                             assert isinstance(obj, list)
1005                             stack.append(obj[idx])
1006                     elif opcode == 128:  # coerce
1007                         u30(coder)
1008                     elif opcode == 133:  # coerce_s
1009                         assert isinstance(stack[-1], (type(None), compat_str))
1010                     elif opcode == 164:  # modulo
1011                         value2 = stack.pop()
1012                         value1 = stack.pop()
1013                         res = value1 % value2
1014                         stack.append(res)
1015                     elif opcode == 208:  # getlocal_0
1016                         stack.append(registers[0])
1017                     elif opcode == 209:  # getlocal_1
1018                         stack.append(registers[1])
1019                     elif opcode == 210:  # getlocal_2
1020                         stack.append(registers[2])
1021                     elif opcode == 211:  # getlocal_3
1022                         stack.append(registers[3])
1023                     elif opcode == 214:  # setlocal_2
1024                         registers[2] = stack.pop()
1025                     elif opcode == 215:  # setlocal_3
1026                         registers[3] = stack.pop()
1027                     else:
1028                         raise NotImplementedError(
1029                             u'Unsupported opcode %d' % opcode)
1030
1031             method_pyfunctions[func_name] = resfunc
1032             return resfunc
1033
1034         initial_function = extract_function(u'decipher')
1035         return lambda s: initial_function([s])
1036
1037     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1038         """Turn the encrypted s field into a working signature"""
1039
1040         if player_url is not None:
1041             try:
1042                 player_id = (player_url, len(s))
1043                 if player_id not in self._player_cache:
1044                     func = self._extract_signature_function(
1045                         video_id, player_url, len(s)
1046                     )
1047                     self._player_cache[player_id] = func
1048                 func = self._player_cache[player_id]
1049                 if self._downloader.params.get('youtube_print_sig_code'):
1050                     self._print_sig_code(func, len(s))
1051                 return func(s)
1052             except Exception:
1053                 tb = traceback.format_exc()
1054                 self._downloader.report_warning(
1055                     u'Automatic signature extraction failed: ' + tb)
1056
1057             self._downloader.report_warning(
1058                 u'Warning: Falling back to static signature algorithm')
1059
1060         return self._static_decrypt_signature(
1061             s, video_id, player_url, age_gate)
1062
1063     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1064         if age_gate:
1065             # The videos with age protection use another player, so the
1066             # algorithms can be different.
1067             if len(s) == 86:
1068                 return s[2:63] + s[82] + s[64:82] + s[63]
1069
1070         if len(s) == 93:
1071             return s[86:29:-1] + s[88] + s[28:5:-1]
1072         elif len(s) == 92:
1073             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1074         elif len(s) == 91:
1075             return s[84:27:-1] + s[86] + s[26:5:-1]
1076         elif len(s) == 90:
1077             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1078         elif len(s) == 89:
1079             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1080         elif len(s) == 88:
1081             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1082         elif len(s) == 87:
1083             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1084         elif len(s) == 86:
1085             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1086         elif len(s) == 85:
1087             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1088         elif len(s) == 84:
1089             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1090         elif len(s) == 83:
1091             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1092         elif len(s) == 82:
1093             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1094         elif len(s) == 81:
1095             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1096         elif len(s) == 80:
1097             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1098         elif len(s) == 79:
1099             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1100
1101         else:
1102             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1103
1104     def _get_available_subtitles(self, video_id):
1105         try:
1106             sub_list = self._download_webpage(
1107                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1108                 video_id, note=False)
1109         except ExtractorError as err:
1110             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1111             return {}
1112         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1113
1114         sub_lang_list = {}
1115         for l in lang_list:
1116             lang = l[1]
1117             params = compat_urllib_parse.urlencode({
1118                 'lang': lang,
1119                 'v': video_id,
1120                 'fmt': self._downloader.params.get('subtitlesformat'),
1121                 'name': l[0],
1122             })
1123             url = u'http://www.youtube.com/api/timedtext?' + params
1124             sub_lang_list[lang] = url
1125         if not sub_lang_list:
1126             self._downloader.report_warning(u'video doesn\'t have subtitles')
1127             return {}
1128         return sub_lang_list
1129
1130     def _get_available_automatic_caption(self, video_id, webpage):
1131         """We need the webpage for getting the captions url, pass it as an
1132            argument to speed up the process."""
1133         sub_format = self._downloader.params.get('subtitlesformat')
1134         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1135         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1136         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1137         if mobj is None:
1138             self._downloader.report_warning(err_msg)
1139             return {}
1140         player_config = json.loads(mobj.group(1))
1141         try:
1142             args = player_config[u'args']
1143             caption_url = args[u'ttsurl']
1144             timestamp = args[u'timestamp']
1145             # We get the available subtitles
1146             list_params = compat_urllib_parse.urlencode({
1147                 'type': 'list',
1148                 'tlangs': 1,
1149                 'asrs': 1,
1150             })
1151             list_url = caption_url + '&' + list_params
1152             list_page = self._download_webpage(list_url, video_id)
1153             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1154             original_lang_node = caption_list.find('track')
1155             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1156                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1157                 return {}
1158             original_lang = original_lang_node.attrib['lang_code']
1159
1160             sub_lang_list = {}
1161             for lang_node in caption_list.findall('target'):
1162                 sub_lang = lang_node.attrib['lang_code']
1163                 params = compat_urllib_parse.urlencode({
1164                     'lang': original_lang,
1165                     'tlang': sub_lang,
1166                     'fmt': sub_format,
1167                     'ts': timestamp,
1168                     'kind': 'asr',
1169                 })
1170                 sub_lang_list[sub_lang] = caption_url + '&' + params
1171             return sub_lang_list
1172         # An extractor error can be raise by the download process if there are
1173         # no automatic captions but there are subtitles
1174         except (KeyError, ExtractorError):
1175             self._downloader.report_warning(err_msg)
1176             return {}
1177
1178     def _print_formats(self, formats):
1179         print('Available formats:')
1180         for x in formats:
1181             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1182                                         self._video_dimensions.get(x, '???'),
1183                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1184
1185     def _extract_id(self, url):
1186         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1187         if mobj is None:
1188             raise ExtractorError(u'Invalid URL: %s' % url)
1189         video_id = mobj.group(2)
1190         return video_id
1191
1192     def _get_video_url_list(self, url_map):
1193         """
1194         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1195         with the requested formats.
1196         """
1197         req_format = self._downloader.params.get('format', None)
1198         format_limit = self._downloader.params.get('format_limit', None)
1199         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1200         if format_limit is not None and format_limit in available_formats:
1201             format_list = available_formats[available_formats.index(format_limit):]
1202         else:
1203             format_list = available_formats
1204         existing_formats = [x for x in format_list if x in url_map]
1205         if len(existing_formats) == 0:
1206             raise ExtractorError(u'no known formats available for video')
1207         if self._downloader.params.get('listformats', None):
1208             self._print_formats(existing_formats)
1209             return
1210         if req_format is None or req_format == 'best':
1211             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1212         elif req_format == 'worst':
1213             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1214         elif req_format in ('-1', 'all'):
1215             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1216         else:
1217             # Specific formats. We pick the first in a slash-delimeted sequence.
1218             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1219             # available in the specified format. For example,
1220             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1221             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1222             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1223             req_formats = req_format.split('/')
1224             video_url_list = None
1225             for rf in req_formats:
1226                 if rf in url_map:
1227                     video_url_list = [(rf, url_map[rf])]
1228                     break
1229                 if rf in self._video_formats_map:
1230                     for srf in self._video_formats_map[rf]:
1231                         if srf in url_map:
1232                             video_url_list = [(srf, url_map[srf])]
1233                             break
1234                     else:
1235                         continue
1236                     break
1237             if video_url_list is None:
1238                 raise ExtractorError(u'requested format not available')
1239         return video_url_list
1240
1241     def _extract_from_m3u8(self, manifest_url, video_id):
1242         url_map = {}
1243         def _get_urls(_manifest):
1244             lines = _manifest.split('\n')
1245             urls = filter(lambda l: l and not l.startswith('#'),
1246                             lines)
1247             return urls
1248         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1249         formats_urls = _get_urls(manifest)
1250         for format_url in formats_urls:
1251             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1252             url_map[itag] = format_url
1253         return url_map
1254
1255     def _extract_annotations(self, video_id):
1256         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1257         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1258
1259     def _real_extract(self, url):
1260         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1261         mobj = re.search(self._NEXT_URL_RE, url)
1262         if mobj:
1263             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1264         video_id = self._extract_id(url)
1265
1266         # Get video webpage
1267         self.report_video_webpage_download(video_id)
1268         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1269         request = compat_urllib_request.Request(url)
1270         try:
1271             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1272         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1273             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1274
1275         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1276
1277         # Attempt to extract SWF player URL
1278         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1279         if mobj is not None:
1280             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1281         else:
1282             player_url = None
1283
1284         # Get video info
1285         self.report_video_info_webpage_download(video_id)
1286         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1287             self.report_age_confirmation()
1288             age_gate = True
1289             # We simulate the access to the video from www.youtube.com/v/{video_id}
1290             # this can be viewed without login into Youtube
1291             data = compat_urllib_parse.urlencode({'video_id': video_id,
1292                                                   'el': 'embedded',
1293                                                   'gl': 'US',
1294                                                   'hl': 'en',
1295                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1296                                                   'asv': 3,
1297                                                   'sts':'1588',
1298                                                   })
1299             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1300             video_info_webpage = self._download_webpage(video_info_url, video_id,
1301                                     note=False,
1302                                     errnote='unable to download video info webpage')
1303             video_info = compat_parse_qs(video_info_webpage)
1304         else:
1305             age_gate = False
1306             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1307                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1308                         % (video_id, el_type))
1309                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1310                                         note=False,
1311                                         errnote='unable to download video info webpage')
1312                 video_info = compat_parse_qs(video_info_webpage)
1313                 if 'token' in video_info:
1314                     break
1315         if 'token' not in video_info:
1316             if 'reason' in video_info:
1317                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1318             else:
1319                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1320
1321         # Check for "rental" videos
1322         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1323             raise ExtractorError(u'"rental" videos not supported')
1324
1325         # Start extracting information
1326         self.report_information_extraction(video_id)
1327
1328         # uploader
1329         if 'author' not in video_info:
1330             raise ExtractorError(u'Unable to extract uploader name')
1331         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1332
1333         # uploader_id
1334         video_uploader_id = None
1335         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1336         if mobj is not None:
1337             video_uploader_id = mobj.group(1)
1338         else:
1339             self._downloader.report_warning(u'unable to extract uploader nickname')
1340
1341         # title
1342         if 'title' in video_info:
1343             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1344         else:
1345             self._downloader.report_warning(u'Unable to extract video title')
1346             video_title = u'_'
1347
1348         # thumbnail image
1349         # We try first to get a high quality image:
1350         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1351                             video_webpage, re.DOTALL)
1352         if m_thumb is not None:
1353             video_thumbnail = m_thumb.group(1)
1354         elif 'thumbnail_url' not in video_info:
1355             self._downloader.report_warning(u'unable to extract video thumbnail')
1356             video_thumbnail = None
1357         else:   # don't panic if we can't find it
1358             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1359
1360         # upload date
1361         upload_date = None
1362         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1363         if mobj is not None:
1364             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1365             upload_date = unified_strdate(upload_date)
1366
1367         # description
1368         video_description = get_element_by_id("eow-description", video_webpage)
1369         if video_description:
1370             video_description = clean_html(video_description)
1371         else:
1372             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1373             if fd_mobj:
1374                 video_description = unescapeHTML(fd_mobj.group(1))
1375             else:
1376                 video_description = u''
1377
1378         # subtitles
1379         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1380
1381         if self._downloader.params.get('listsubtitles', False):
1382             self._list_available_subtitles(video_id, video_webpage)
1383             return
1384
1385         if 'length_seconds' not in video_info:
1386             self._downloader.report_warning(u'unable to extract video duration')
1387             video_duration = ''
1388         else:
1389             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1390
1391         # annotations
1392         video_annotations = None
1393         if self._downloader.params.get('writeannotations', False):
1394                 video_annotations = self._extract_annotations(video_id)
1395
1396         # Decide which formats to download
1397
1398         try:
1399             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1400             if not mobj:
1401                 raise ValueError('Could not find vevo ID')
1402             info = json.loads(mobj.group(1))
1403             args = info['args']
1404             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1405             # this signatures are encrypted
1406             if 'url_encoded_fmt_stream_map' not in args:
1407                 raise ValueError(u'No stream_map present')  # caught below
1408             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1409             if m_s is not None:
1410                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1411                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1412             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1413             if m_s is not None:
1414                 if 'url_encoded_fmt_stream_map' in video_info:
1415                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1416                 else:
1417                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1418             elif 'adaptive_fmts' in video_info:
1419                 if 'url_encoded_fmt_stream_map' in video_info:
1420                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1421                 else:
1422                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1423         except ValueError:
1424             pass
1425
1426         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1427             self.report_rtmp_download()
1428             video_url_list = [(None, video_info['conn'][0])]
1429         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1430             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1431                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1432             url_map = {}
1433             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1434                 url_data = compat_parse_qs(url_data_str)
1435                 if 'itag' in url_data and 'url' in url_data:
1436                     url = url_data['url'][0]
1437                     if 'sig' in url_data:
1438                         url += '&signature=' + url_data['sig'][0]
1439                     elif 's' in url_data:
1440                         encrypted_sig = url_data['s'][0]
1441                         if self._downloader.params.get('verbose'):
1442                             if age_gate:
1443                                 if player_url is None:
1444                                     player_version = 'unknown'
1445                                 else:
1446                                     player_version = self._search_regex(
1447                                         r'-(.+)\.swf$', player_url,
1448                                         u'flash player', fatal=False)
1449                                 player_desc = 'flash player %s' % player_version
1450                             else:
1451                                 player_version = self._search_regex(
1452                                     r'html5player-(.+?)\.js', video_webpage,
1453                                     'html5 player', fatal=False)
1454                                 player_desc = u'html5 player %s' % player_version
1455
1456                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1457                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1458                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1459
1460                         if not age_gate:
1461                             jsplayer_url_json = self._search_regex(
1462                                 r'"assets":.+?"js":\s*("[^"]+")',
1463                                 video_webpage, u'JS player URL')
1464                             player_url = json.loads(jsplayer_url_json)
1465
1466                         signature = self._decrypt_signature(
1467                             encrypted_sig, video_id, player_url, age_gate)
1468                         url += '&signature=' + signature
1469                     if 'ratebypass' not in url:
1470                         url += '&ratebypass=yes'
1471                     url_map[url_data['itag'][0]] = url
1472             video_url_list = self._get_video_url_list(url_map)
1473             if not video_url_list:
1474                 return
1475         elif video_info.get('hlsvp'):
1476             manifest_url = video_info['hlsvp'][0]
1477             url_map = self._extract_from_m3u8(manifest_url, video_id)
1478             video_url_list = self._get_video_url_list(url_map)
1479             if not video_url_list:
1480                 return
1481
1482         else:
1483             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1484
1485         results = []
1486         for format_param, video_real_url in video_url_list:
1487             # Extension
1488             video_extension = self._video_extensions.get(format_param, 'flv')
1489
1490             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1491                                               self._video_dimensions.get(format_param, '???'),
1492                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1493
1494             results.append({
1495                 'id':       video_id,
1496                 'url':      video_real_url,
1497                 'uploader': video_uploader,
1498                 'uploader_id': video_uploader_id,
1499                 'upload_date':  upload_date,
1500                 'title':    video_title,
1501                 'ext':      video_extension,
1502                 'format':   video_format,
1503                 'thumbnail':    video_thumbnail,
1504                 'description':  video_description,
1505                 'player_url':   player_url,
1506                 'subtitles':    video_subtitles,
1507                 'duration':     video_duration,
1508                 'age_limit':    18 if age_gate else 0,
1509                 'annotations':  video_annotations
1510             })
1511         return results
1512
1513 class YoutubePlaylistIE(InfoExtractor):
1514     IE_DESC = u'YouTube.com playlists'
1515     _VALID_URL = r"""(?:
1516                         (?:https?://)?
1517                         (?:\w+\.)?
1518                         youtube\.com/
1519                         (?:
1520                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1521                            \? (?:.*?&)*? (?:p|a|list)=
1522                         |  p/
1523                         )
1524                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1525                         .*
1526                      |
1527                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1528                      )"""
1529     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1530     _MAX_RESULTS = 50
1531     IE_NAME = u'youtube:playlist'
1532
1533     @classmethod
1534     def suitable(cls, url):
1535         """Receives a URL and returns True if suitable for this IE."""
1536         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1537
1538     def _real_extract(self, url):
1539         # Extract playlist id
1540         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1541         if mobj is None:
1542             raise ExtractorError(u'Invalid URL: %s' % url)
1543         playlist_id = mobj.group(1) or mobj.group(2)
1544
1545         # Check if it's a video-specific URL
1546         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1547         if 'v' in query_dict:
1548             video_id = query_dict['v'][0]
1549             if self._downloader.params.get('noplaylist'):
1550                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1551                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1552             else:
1553                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1554
1555         # Download playlist videos from API
1556         videos = []
1557
1558         for page_num in itertools.count(1):
1559             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1560             if start_index >= 1000:
1561                 self._downloader.report_warning(u'Max number of results reached')
1562                 break
1563             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1564             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1565
1566             try:
1567                 response = json.loads(page)
1568             except ValueError as err:
1569                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1570
1571             if 'feed' not in response:
1572                 raise ExtractorError(u'Got a malformed response from YouTube API')
1573             playlist_title = response['feed']['title']['$t']
1574             if 'entry' not in response['feed']:
1575                 # Number of videos is a multiple of self._MAX_RESULTS
1576                 break
1577
1578             for entry in response['feed']['entry']:
1579                 index = entry['yt$position']['$t']
1580                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1581                     videos.append((
1582                         index,
1583                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1584                     ))
1585
1586         videos = [v[1] for v in sorted(videos)]
1587
1588         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1589         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1590
1591
1592 class YoutubeChannelIE(InfoExtractor):
1593     IE_DESC = u'YouTube.com channels'
1594     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1595     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1596     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1597     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1598     IE_NAME = u'youtube:channel'
1599
1600     def extract_videos_from_page(self, page):
1601         ids_in_page = []
1602         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1603             if mobj.group(1) not in ids_in_page:
1604                 ids_in_page.append(mobj.group(1))
1605         return ids_in_page
1606
1607     def _real_extract(self, url):
1608         # Extract channel id
1609         mobj = re.match(self._VALID_URL, url)
1610         if mobj is None:
1611             raise ExtractorError(u'Invalid URL: %s' % url)
1612
1613         # Download channel page
1614         channel_id = mobj.group(1)
1615         video_ids = []
1616         pagenum = 1
1617
1618         url = self._TEMPLATE_URL % (channel_id, pagenum)
1619         page = self._download_webpage(url, channel_id,
1620                                       u'Downloading page #%s' % pagenum)
1621
1622         # Extract video identifiers
1623         ids_in_page = self.extract_videos_from_page(page)
1624         video_ids.extend(ids_in_page)
1625
1626         # Download any subsequent channel pages using the json-based channel_ajax query
1627         if self._MORE_PAGES_INDICATOR in page:
1628             for pagenum in itertools.count(1):
1629                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1630                 page = self._download_webpage(url, channel_id,
1631                                               u'Downloading page #%s' % pagenum)
1632
1633                 page = json.loads(page)
1634
1635                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1636                 video_ids.extend(ids_in_page)
1637
1638                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1639                     break
1640
1641         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1642
1643         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1644         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1645         return [self.playlist_result(url_entries, channel_id)]
1646
1647
1648 class YoutubeUserIE(InfoExtractor):
1649     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1650     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1651     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1652     _GDATA_PAGE_SIZE = 50
1653     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1654     IE_NAME = u'youtube:user'
1655
1656     @classmethod
1657     def suitable(cls, url):
1658         # Don't return True if the url can be extracted with other youtube
1659         # extractor, the regex would is too permissive and it would match.
1660         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1661         if any(ie.suitable(url) for ie in other_ies): return False
1662         else: return super(YoutubeUserIE, cls).suitable(url)
1663
1664     def _real_extract(self, url):
1665         # Extract username
1666         mobj = re.match(self._VALID_URL, url)
1667         if mobj is None:
1668             raise ExtractorError(u'Invalid URL: %s' % url)
1669
1670         username = mobj.group(1)
1671
1672         # Download video ids using YouTube Data API. Result size per
1673         # query is limited (currently to 50 videos) so we need to query
1674         # page by page until there are no video ids - it means we got
1675         # all of them.
1676
1677         video_ids = []
1678
1679         for pagenum in itertools.count(0):
1680             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1681
1682             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1683             page = self._download_webpage(gdata_url, username,
1684                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1685
1686             try:
1687                 response = json.loads(page)
1688             except ValueError as err:
1689                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1690             if 'entry' not in response['feed']:
1691                 # Number of videos is a multiple of self._MAX_RESULTS
1692                 break
1693
1694             # Extract video identifiers
1695             ids_in_page = []
1696             for entry in response['feed']['entry']:
1697                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1698             video_ids.extend(ids_in_page)
1699
1700             # A little optimization - if current page is not
1701             # "full", ie. does not contain PAGE_SIZE video ids then
1702             # we can assume that this page is the last one - there
1703             # are no more ids on further pages - no need to query
1704             # again.
1705
1706             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1707                 break
1708
1709         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1710         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1711         return [self.playlist_result(url_results, playlist_title = username)]
1712
1713 class YoutubeSearchIE(SearchInfoExtractor):
1714     IE_DESC = u'YouTube.com searches'
1715     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1716     _MAX_RESULTS = 1000
1717     IE_NAME = u'youtube:search'
1718     _SEARCH_KEY = 'ytsearch'
1719
1720     def report_download_page(self, query, pagenum):
1721         """Report attempt to download search page with given number."""
1722         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1723
1724     def _get_n_results(self, query, n):
1725         """Get a specified number of results for a query"""
1726
1727         video_ids = []
1728         pagenum = 0
1729         limit = n
1730
1731         while (50 * pagenum) < limit:
1732             self.report_download_page(query, pagenum+1)
1733             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1734             request = compat_urllib_request.Request(result_url)
1735             try:
1736                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1737             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1738                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1739             api_response = json.loads(data)['data']
1740
1741             if not 'items' in api_response:
1742                 raise ExtractorError(u'[youtube] No video results')
1743
1744             new_ids = list(video['id'] for video in api_response['items'])
1745             video_ids += new_ids
1746
1747             limit = min(n, api_response['totalItems'])
1748             pagenum += 1
1749
1750         if len(video_ids) > n:
1751             video_ids = video_ids[:n]
1752         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1753         return self.playlist_result(videos, query)
1754
1755
1756 class YoutubeShowIE(InfoExtractor):
1757     IE_DESC = u'YouTube.com (multi-season) shows'
1758     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1759     IE_NAME = u'youtube:show'
1760
1761     def _real_extract(self, url):
1762         mobj = re.match(self._VALID_URL, url)
1763         show_name = mobj.group(1)
1764         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1765         # There's one playlist for each season of the show
1766         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1767         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1768         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1769
1770
1771 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1772     """
1773     Base class for extractors that fetch info from
1774     http://www.youtube.com/feed_ajax
1775     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1776     """
1777     _LOGIN_REQUIRED = True
1778     _PAGING_STEP = 30
1779     # use action_load_personal_feed instead of action_load_system_feed
1780     _PERSONAL_FEED = False
1781
1782     @property
1783     def _FEED_TEMPLATE(self):
1784         action = 'action_load_system_feed'
1785         if self._PERSONAL_FEED:
1786             action = 'action_load_personal_feed'
1787         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1788
1789     @property
1790     def IE_NAME(self):
1791         return u'youtube:%s' % self._FEED_NAME
1792
1793     def _real_initialize(self):
1794         self._login()
1795
1796     def _real_extract(self, url):
1797         feed_entries = []
1798         # The step argument is available only in 2.7 or higher
1799         for i in itertools.count(0):
1800             paging = i*self._PAGING_STEP
1801             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1802                                           u'%s feed' % self._FEED_NAME,
1803                                           u'Downloading page %s' % i)
1804             info = json.loads(info)
1805             feed_html = info['feed_html']
1806             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1807             ids = orderedSet(m.group(1) for m in m_ids)
1808             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1809             if info['paging'] is None:
1810                 break
1811         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1812
1813 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1814     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1815     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1816     _FEED_NAME = 'subscriptions'
1817     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1818
1819 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1820     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1821     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1822     _FEED_NAME = 'recommended'
1823     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1824
1825 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1826     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1827     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1828     _FEED_NAME = 'watch_later'
1829     _PLAYLIST_TITLE = u'Youtube Watch Later'
1830     _PAGING_STEP = 100
1831     _PERSONAL_FEED = True
1832
1833 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1834     IE_NAME = u'youtube:favorites'
1835     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1836     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1837     _LOGIN_REQUIRED = True
1838
1839     def _real_extract(self, url):
1840         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1841         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1842         return self.url_result(playlist_id, 'YoutubePlaylist')
1843
1844
1845 class YoutubeTruncatedURLIE(InfoExtractor):
1846     IE_NAME = 'youtube:truncated_url'
1847     IE_DESC = False  # Do not list
1848     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1849
1850     def _real_extract(self, url):
1851         raise ExtractorError(
1852             u'Did you forget to quote the URL? Remember that & is a meta '
1853             u'character in most shells, so you want to put the URL in quotes, '
1854             u'like  youtube-dl '
1855             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1856             u' (or simply  youtube-dl BaW_jenozKc  ).',
1857             expected=True)