git.bitcoin.ninja Git - youtube-dl/blob - youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_cachedir,
  31     get_element_by_id,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = None
  78         dsh = None
  79         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  80         if match:
  81           galx = match.group(1)
  82         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  83         if match:
  84           dsh = match.group(1)
  85
  86         # Log in
  87         login_form_strs = {
  88                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  89                 u'Email': username,
  90                 u'GALX': galx,
  91                 u'Passwd': password,
  92                 u'PersistentCookie': u'yes',
  93                 u'_utf8': u'霱',
  94                 u'bgresponse': u'js_disabled',
  95                 u'checkConnection': u'',
  96                 u'checkedDomains': u'youtube',
  97                 u'dnConn': u'',
  98                 u'dsh': dsh,
  99                 u'pstMsg': u'0',
 100                 u'rmShown': u'1',
 101                 u'secTok': u'',
 102                 u'signIn': u'Sign in',
 103                 u'timeStmp': u'',
 104                 u'service': u'youtube',
 105                 u'uilel': u'3',
 106                 u'hl': u'en_US',
 107         }
 108         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 109         # chokes on unicode
 110         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 111         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 112         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 113         try:
 114             self.report_login()
 115             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 116             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 117                 self._downloader.report_warning(u'unable to log in: bad username or password')
 118                 return False
 119         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 120             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 121             return False
 122         return True
 123
 124     def _confirm_age(self):
 125         age_form = {
 126                 'next_url':     '/',
 127                 'action_confirm':   'Confirm',
 128                 }
 129         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 130         try:
 131             self.report_age_confirmation()
 132             compat_urllib_request.urlopen(request).read().decode('utf-8')
 133         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 134             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 135         return True
 136
 137     def _real_initialize(self):
 138         if self._downloader is None:
 139             return
 140         if not self._set_language():
 141             return
 142         if not self._login():
 143             return
 144         self._confirm_age()
 145
 146
 147 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 148     IE_DESC = u'YouTube.com'
 149     _VALID_URL = r"""^
 150                      (
 151                          (?:https?://)?                                       # http(s):// (optional)
 152                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 153                             tube\.majestyc\.net/|
 154                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 155                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 156                          (?:                                                  # the various things that can precede the ID:
 157                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 158                              |(?:                                             # or the v= param in all its forms
 159                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 160                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 161                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 162                                  v=
 163                              )
 164                          ))
 165                          |youtu\.be/                                          # just youtu.be/xxxx
 166                          )
 167                      )?                                                       # all until now is optional -> you can pass the naked ID
 168                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 169                      (?(1).+)?                                                # if we found the ID, everything can follow
 170                      $"""
 171     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 172     # Listed in order of quality
 173     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 174                           # Apple HTTP Live Streaming
 175                           '96', '95', '94', '93', '92', '132', '151',
 176                           # 3D
 177                           '85', '84', '102', '83', '101', '82', '100',
 178                           # Dash video
 179                           '138', '137', '248', '136', '247', '135', '246',
 180                           '245', '244', '134', '243', '133', '242', '160',
 181                           # Dash audio
 182                           '141', '172', '140', '171', '139',
 183                           ]
 184     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 185                                       # Apple HTTP Live Streaming
 186                                       '96', '95', '94', '93', '92', '132', '151',
 187                                       # 3D
 188                                       '85', '102', '84', '101', '83', '100', '82',
 189                                       # Dash video
 190                                       '138', '248', '137', '247', '136', '246', '245',
 191                                       '244', '135', '243', '134', '242', '133', '160',
 192                                       # Dash audio
 193                                       '172', '141', '171', '140', '139',
 194                                       ]
 195     _video_formats_map = {
 196         'flv': ['35', '34', '6', '5'],
 197         '3gp': ['36', '17', '13'],
 198         'mp4': ['38', '37', '22', '18'],
 199         'webm': ['46', '45', '44', '43'],
 200     }
 201     _video_extensions = {
 202         '13': '3gp',
 203         '17': '3gp',
 204         '18': 'mp4',
 205         '22': 'mp4',
 206         '36': '3gp',
 207         '37': 'mp4',
 208         '38': 'mp4',
 209         '43': 'webm',
 210         '44': 'webm',
 211         '45': 'webm',
 212         '46': 'webm',
 213
 214         # 3d videos
 215         '82': 'mp4',
 216         '83': 'mp4',
 217         '84': 'mp4',
 218         '85': 'mp4',
 219         '100': 'webm',
 220         '101': 'webm',
 221         '102': 'webm',
 222
 223         # Apple HTTP Live Streaming
 224         '92': 'mp4',
 225         '93': 'mp4',
 226         '94': 'mp4',
 227         '95': 'mp4',
 228         '96': 'mp4',
 229         '132': 'mp4',
 230         '151': 'mp4',
 231
 232         # Dash mp4
 233         '133': 'mp4',
 234         '134': 'mp4',
 235         '135': 'mp4',
 236         '136': 'mp4',
 237         '137': 'mp4',
 238         '138': 'mp4',
 239         '160': 'mp4',
 240
 241         # Dash mp4 audio
 242         '139': 'm4a',
 243         '140': 'm4a',
 244         '141': 'm4a',
 245
 246         # Dash webm
 247         '171': 'webm',
 248         '172': 'webm',
 249         '242': 'webm',
 250         '243': 'webm',
 251         '244': 'webm',
 252         '245': 'webm',
 253         '246': 'webm',
 254         '247': 'webm',
 255         '248': 'webm',
 256     }
 257     _video_dimensions = {
 258         '5': '240x400',
 259         '6': '???',
 260         '13': '???',
 261         '17': '144x176',
 262         '18': '360x640',
 263         '22': '720x1280',
 264         '34': '360x640',
 265         '35': '480x854',
 266         '36': '240x320',
 267         '37': '1080x1920',
 268         '38': '3072x4096',
 269         '43': '360x640',
 270         '44': '480x854',
 271         '45': '720x1280',
 272         '46': '1080x1920',
 273         '82': '360p',
 274         '83': '480p',
 275         '84': '720p',
 276         '85': '1080p',
 277         '92': '240p',
 278         '93': '360p',
 279         '94': '480p',
 280         '95': '720p',
 281         '96': '1080p',
 282         '100': '360p',
 283         '101': '480p',
 284         '102': '720p',
 285         '132': '240p',
 286         '151': '72p',
 287         '133': '240p',
 288         '134': '360p',
 289         '135': '480p',
 290         '136': '720p',
 291         '137': '1080p',
 292         '138': '>1080p',
 293         '139': '48k',
 294         '140': '128k',
 295         '141': '256k',
 296         '160': '192p',
 297         '171': '128k',
 298         '172': '256k',
 299         '242': '240p',
 300         '243': '360p',
 301         '244': '480p',
 302         '245': '480p',
 303         '246': '480p',
 304         '247': '720p',
 305         '248': '1080p',
 306     }
 307     _special_itags = {
 308         '82': '3D',
 309         '83': '3D',
 310         '84': '3D',
 311         '85': '3D',
 312         '100': '3D',
 313         '101': '3D',
 314         '102': '3D',
 315         '133': 'DASH Video',
 316         '134': 'DASH Video',
 317         '135': 'DASH Video',
 318         '136': 'DASH Video',
 319         '137': 'DASH Video',
 320         '138': 'DASH Video',
 321         '139': 'DASH Audio',
 322         '140': 'DASH Audio',
 323         '141': 'DASH Audio',
 324         '160': 'DASH Video',
 325         '171': 'DASH Audio',
 326         '172': 'DASH Audio',
 327         '242': 'DASH Video',
 328         '243': 'DASH Video',
 329         '244': 'DASH Video',
 330         '245': 'DASH Video',
 331         '246': 'DASH Video',
 332         '247': 'DASH Video',
 333         '248': 'DASH Video',
 334     }
 335
 336     IE_NAME = u'youtube'
 337     _TESTS = [
 338         {
 339             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 340             u"file":  u"BaW_jenozKc.mp4",
 341             u"info_dict": {
 342                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 343                 u"uploader": u"Philipp Hagemeister",
 344                 u"uploader_id": u"phihag",
 345                 u"upload_date": u"20121002",
 346                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 347             }
 348         },
 349         {
 350             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 351             u"file":  u"1ltcDfZMA3U.mp4",
 352             u"note": u"Test VEVO video (#897)",
 353             u"info_dict": {
 354                 u"upload_date": u"20070518",
 355                 u"title": u"Maps - It Will Find You",
 356                 u"description": u"Music video by Maps performing It Will Find You.",
 357                 u"uploader": u"MuteUSA",
 358                 u"uploader_id": u"MuteUSA"
 359             }
 360         },
 361         {
 362             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 363             u"file":  u"UxxajLWwzqY.mp4",
 364             u"note": u"Test generic use_cipher_signature video (#897)",
 365             u"info_dict": {
 366                 u"upload_date": u"20120506",
 367                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 368                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 369                 u"uploader": u"Icona Pop",
 370                 u"uploader_id": u"IconaPop"
 371             }
 372         },
 373         {
 374             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 375             u"file":  u"07FYdnEawAQ.mp4",
 376             u"note": u"Test VEVO video with age protection (#956)",
 377             u"info_dict": {
 378                 u"upload_date": u"20130703",
 379                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 380                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 381                 u"uploader": u"justintimberlakeVEVO",
 382                 u"uploader_id": u"justintimberlakeVEVO"
 383             }
 384         },
 385     ]
 386
 387
 388     @classmethod
 389     def suitable(cls, url):
 390         """Receives a URL and returns True if suitable for this IE."""
 391         if YoutubePlaylistIE.suitable(url): return False
 392         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 393
 394     def __init__(self, *args, **kwargs):
 395         super(YoutubeIE, self).__init__(*args, **kwargs)
 396         self._player_cache = {}
 397
 398     def report_video_webpage_download(self, video_id):
 399         """Report attempt to download video webpage."""
 400         self.to_screen(u'%s: Downloading video webpage' % video_id)
 401
 402     def report_video_info_webpage_download(self, video_id):
 403         """Report attempt to download video info webpage."""
 404         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 405
 406     def report_information_extraction(self, video_id):
 407         """Report attempt to extract video information."""
 408         self.to_screen(u'%s: Extracting video information' % video_id)
 409
 410     def report_unavailable_format(self, video_id, format):
 411         """Report extracted video URL."""
 412         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 413
 414     def report_rtmp_download(self):
 415         """Indicate the download will use the RTMP protocol."""
 416         self.to_screen(u'RTMP download detected')
 417
 418     def _extract_signature_function(self, video_id, player_url, slen):
 419         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 420                         player_url)
 421         player_type = id_m.group('ext')
 422         player_id = id_m.group('id')
 423
 424         # Read from filesystem cache
 425         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 426         assert os.path.basename(func_id) == func_id
 427         cache_dir = get_cachedir(self._downloader.params)
 428
 429         cache_enabled = cache_dir is not None
 430         if cache_enabled:
 431             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 432                                     u'youtube-sigfuncs',
 433                                     func_id + '.json')
 434             try:
 435                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 436                     cache_spec = json.load(cachef)
 437                 return lambda s: u''.join(s[i] for i in cache_spec)
 438             except IOError:
 439                 pass  # No cache available
 440
 441         if player_type == 'js':
 442             code = self._download_webpage(
 443                 player_url, video_id,
 444                 note=u'Downloading %s player %s' % (player_type, player_id),
 445                 errnote=u'Download of %s failed' % player_url)
 446             res = self._parse_sig_js(code)
 447         elif player_type == 'swf':
 448             urlh = self._request_webpage(
 449                 player_url, video_id,
 450                 note=u'Downloading %s player %s' % (player_type, player_id),
 451                 errnote=u'Download of %s failed' % player_url)
 452             code = urlh.read()
 453             res = self._parse_sig_swf(code)
 454         else:
 455             assert False, 'Invalid player type %r' % player_type
 456
 457         if cache_enabled:
 458             try:
 459                 test_string = u''.join(map(compat_chr, range(slen)))
 460                 cache_res = res(test_string)
 461                 cache_spec = [ord(c) for c in cache_res]
 462                 try:
 463                     os.makedirs(os.path.dirname(cache_fn))
 464                 except OSError as ose:
 465                     if ose.errno != errno.EEXIST:
 466                         raise
 467                 write_json_file(cache_spec, cache_fn)
 468             except Exception:
 469                 tb = traceback.format_exc()
 470                 self._downloader.report_warning(
 471                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 472
 473         return res
 474
 475     def _print_sig_code(self, func, slen):
 476         def gen_sig_code(idxs):
 477             def _genslice(start, end, step):
 478                 starts = u'' if start == 0 else str(start)
 479                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 480                 steps = u'' if step == 1 else (u':%d' % step)
 481                 return u's[%s%s%s]' % (starts, ends, steps)
 482
 483             step = None
 484             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 485                                     # set as soon as step is set
 486             for i, prev in zip(idxs[1:], idxs[:-1]):
 487                 if step is not None:
 488                     if i - prev == step:
 489                         continue
 490                     yield _genslice(start, prev, step)
 491                     step = None
 492                     continue
 493                 if i - prev in [-1, 1]:
 494                     step = i - prev
 495                     start = prev
 496                     continue
 497                 else:
 498                     yield u's[%d]' % prev
 499             if step is None:
 500                 yield u's[%d]' % i
 501             else:
 502                 yield _genslice(start, i, step)
 503
 504         test_string = u''.join(map(compat_chr, range(slen)))
 505         cache_res = func(test_string)
 506         cache_spec = [ord(c) for c in cache_res]
 507         expr_code = u' + '.join(gen_sig_code(cache_spec))
 508         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 509         self.to_screen(u'Extracted signature function:\n' + code)
 510
 511     def _parse_sig_js(self, jscode):
 512         funcname = self._search_regex(
 513             r'signature=([a-zA-Z]+)', jscode,
 514             u'Initial JS player signature function name')
 515
 516         functions = {}
 517
 518         def argidx(varname):
 519             return string.lowercase.index(varname)
 520
 521         def interpret_statement(stmt, local_vars, allow_recursion=20):
 522             if allow_recursion < 0:
 523                 raise ExtractorError(u'Recursion limit reached')
 524
 525             if stmt.startswith(u'var '):
 526                 stmt = stmt[len(u'var '):]
 527             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 528                              r'=(?P<expr>.*)$', stmt)
 529             if ass_m:
 530                 if ass_m.groupdict().get('index'):
 531                     def assign(val):
 532                         lvar = local_vars[ass_m.group('out')]
 533                         idx = interpret_expression(ass_m.group('index'),
 534                                                    local_vars, allow_recursion)
 535                         assert isinstance(idx, int)
 536                         lvar[idx] = val
 537                         return val
 538                     expr = ass_m.group('expr')
 539                 else:
 540                     def assign(val):
 541                         local_vars[ass_m.group('out')] = val
 542                         return val
 543                     expr = ass_m.group('expr')
 544             elif stmt.startswith(u'return '):
 545                 assign = lambda v: v
 546                 expr = stmt[len(u'return '):]
 547             else:
 548                 raise ExtractorError(
 549                     u'Cannot determine left side of statement in %r' % stmt)
 550
 551             v = interpret_expression(expr, local_vars, allow_recursion)
 552             return assign(v)
 553
 554         def interpret_expression(expr, local_vars, allow_recursion):
 555             if expr.isdigit():
 556                 return int(expr)
 557
 558             if expr.isalpha():
 559                 return local_vars[expr]
 560
 561             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 562             if m:
 563                 member = m.group('member')
 564                 val = local_vars[m.group('in')]
 565                 if member == 'split("")':
 566                     return list(val)
 567                 if member == 'join("")':
 568                     return u''.join(val)
 569                 if member == 'length':
 570                     return len(val)
 571                 if member == 'reverse()':
 572                     return val[::-1]
 573                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 574                 if slice_m:
 575                     idx = interpret_expression(
 576                         slice_m.group('idx'), local_vars, allow_recursion-1)
 577                     return val[idx:]
 578
 579             m = re.match(
 580                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 581             if m:
 582                 val = local_vars[m.group('in')]
 583                 idx = interpret_expression(m.group('idx'), local_vars,
 584                                            allow_recursion-1)
 585                 return val[idx]
 586
 587             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 588             if m:
 589                 a = interpret_expression(m.group('a'),
 590                                          local_vars, allow_recursion)
 591                 b = interpret_expression(m.group('b'),
 592                                          local_vars, allow_recursion)
 593                 return a % b
 594
 595             m = re.match(
 596                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 597             if m:
 598                 fname = m.group('func')
 599                 if fname not in functions:
 600                     functions[fname] = extract_function(fname)
 601                 argvals = [int(v) if v.isdigit() else local_vars[v]
 602                            for v in m.group('args').split(',')]
 603                 return functions[fname](argvals)
 604             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 605
 606         def extract_function(funcname):
 607             func_m = re.search(
 608                 r'function ' + re.escape(funcname) +
 609                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 610                 jscode)
 611             argnames = func_m.group('args').split(',')
 612
 613             def resf(args):
 614                 local_vars = dict(zip(argnames, args))
 615                 for stmt in func_m.group('code').split(';'):
 616                     res = interpret_statement(stmt, local_vars)
 617                 return res
 618             return resf
 619
 620         initial_function = extract_function(funcname)
 621         return lambda s: initial_function([s])
 622
 623     def _parse_sig_swf(self, file_contents):
 624         if file_contents[1:3] != b'WS':
 625             raise ExtractorError(
 626                 u'Not an SWF file; header is %r' % file_contents[:3])
 627         if file_contents[:1] == b'C':
 628             content = zlib.decompress(file_contents[8:])
 629         else:
 630             raise NotImplementedError(u'Unsupported compression format %r' %
 631                                       file_contents[:1])
 632
 633         def extract_tags(content):
 634             pos = 0
 635             while pos < len(content):
 636                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 637                 pos += 2
 638                 tag_code = header16 >> 6
 639                 tag_len = header16 & 0x3f
 640                 if tag_len == 0x3f:
 641                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 642                     pos += 4
 643                 assert pos+tag_len <= len(content)
 644                 yield (tag_code, content[pos:pos+tag_len])
 645                 pos += tag_len
 646
 647         code_tag = next(tag
 648                         for tag_code, tag in extract_tags(content)
 649                         if tag_code == 82)
 650         p = code_tag.index(b'\0', 4) + 1
 651         code_reader = io.BytesIO(code_tag[p:])
 652
 653         # Parse ABC (AVM2 ByteCode)
 654         def read_int(reader=None):
 655             if reader is None:
 656                 reader = code_reader
 657             res = 0
 658             shift = 0
 659             for _ in range(5):
 660                 buf = reader.read(1)
 661                 assert len(buf) == 1
 662                 b = struct.unpack('<B', buf)[0]
 663                 res = res | ((b & 0x7f) << shift)
 664                 if b & 0x80 == 0:
 665                     break
 666                 shift += 7
 667             return res
 668
 669         def u30(reader=None):
 670             res = read_int(reader)
 671             assert res & 0xf0000000 == 0
 672             return res
 673         u32 = read_int
 674
 675         def s32(reader=None):
 676             v = read_int(reader)
 677             if v & 0x80000000 != 0:
 678                 v = - ((v ^ 0xffffffff) + 1)
 679             return v
 680
 681         def read_string(reader=None):
 682             if reader is None:
 683                 reader = code_reader
 684             slen = u30(reader)
 685             resb = reader.read(slen)
 686             assert len(resb) == slen
 687             return resb.decode('utf-8')
 688
 689         def read_bytes(count, reader=None):
 690             if reader is None:
 691                 reader = code_reader
 692             resb = reader.read(count)
 693             assert len(resb) == count
 694             return resb
 695
 696         def read_byte(reader=None):
 697             resb = read_bytes(1, reader=reader)
 698             res = struct.unpack('<B', resb)[0]
 699             return res
 700
 701         # minor_version + major_version
 702         read_bytes(2 + 2)
 703
 704         # Constant pool
 705         int_count = u30()
 706         for _c in range(1, int_count):
 707             s32()
 708         uint_count = u30()
 709         for _c in range(1, uint_count):
 710             u32()
 711         double_count = u30()
 712         read_bytes((double_count-1) * 8)
 713         string_count = u30()
 714         constant_strings = [u'']
 715         for _c in range(1, string_count):
 716             s = read_string()
 717             constant_strings.append(s)
 718         namespace_count = u30()
 719         for _c in range(1, namespace_count):
 720             read_bytes(1)  # kind
 721             u30()  # name
 722         ns_set_count = u30()
 723         for _c in range(1, ns_set_count):
 724             count = u30()
 725             for _c2 in range(count):
 726                 u30()
 727         multiname_count = u30()
 728         MULTINAME_SIZES = {
 729             0x07: 2,  # QName
 730             0x0d: 2,  # QNameA
 731             0x0f: 1,  # RTQName
 732             0x10: 1,  # RTQNameA
 733             0x11: 0,  # RTQNameL
 734             0x12: 0,  # RTQNameLA
 735             0x09: 2,  # Multiname
 736             0x0e: 2,  # MultinameA
 737             0x1b: 1,  # MultinameL
 738             0x1c: 1,  # MultinameLA
 739         }
 740         multinames = [u'']
 741         for _c in range(1, multiname_count):
 742             kind = u30()
 743             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 744             if kind == 0x07:
 745                 u30()  # namespace_idx
 746                 name_idx = u30()
 747                 multinames.append(constant_strings[name_idx])
 748             else:
 749                 multinames.append('[MULTINAME kind: %d]' % kind)
 750                 for _c2 in range(MULTINAME_SIZES[kind]):
 751                     u30()
 752
 753         # Methods
 754         method_count = u30()
 755         MethodInfo = collections.namedtuple(
 756             'MethodInfo',
 757             ['NEED_ARGUMENTS', 'NEED_REST'])
 758         method_infos = []
 759         for method_id in range(method_count):
 760             param_count = u30()
 761             u30()  # return type
 762             for _ in range(param_count):
 763                 u30()  # param type
 764             u30()  # name index (always 0 for youtube)
 765             flags = read_byte()
 766             if flags & 0x08 != 0:
 767                 # Options present
 768                 option_count = u30()
 769                 for c in range(option_count):
 770                     u30()  # val
 771                     read_bytes(1)  # kind
 772             if flags & 0x80 != 0:
 773                 # Param names present
 774                 for _ in range(param_count):
 775                     u30()  # param name
 776             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 777             method_infos.append(mi)
 778
 779         # Metadata
 780         metadata_count = u30()
 781         for _c in range(metadata_count):
 782             u30()  # name
 783             item_count = u30()
 784             for _c2 in range(item_count):
 785                 u30()  # key
 786                 u30()  # value
 787
 788         def parse_traits_info():
 789             trait_name_idx = u30()
 790             kind_full = read_byte()
 791             kind = kind_full & 0x0f
 792             attrs = kind_full >> 4
 793             methods = {}
 794             if kind in [0x00, 0x06]:  # Slot or Const
 795                 u30()  # Slot id
 796                 u30()  # type_name_idx
 797                 vindex = u30()
 798                 if vindex != 0:
 799                     read_byte()  # vkind
 800             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 801                 u30()  # disp_id
 802                 method_idx = u30()
 803                 methods[multinames[trait_name_idx]] = method_idx
 804             elif kind == 0x04:  # Class
 805                 u30()  # slot_id
 806                 u30()  # classi
 807             elif kind == 0x05:  # Function
 808                 u30()  # slot_id
 809                 function_idx = u30()
 810                 methods[function_idx] = multinames[trait_name_idx]
 811             else:
 812                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 813
 814             if attrs & 0x4 != 0:  # Metadata present
 815                 metadata_count = u30()
 816                 for _c3 in range(metadata_count):
 817                     u30()  # metadata index
 818
 819             return methods
 820
 821         # Classes
 822         TARGET_CLASSNAME = u'SignatureDecipher'
 823         searched_idx = multinames.index(TARGET_CLASSNAME)
 824         searched_class_id = None
 825         class_count = u30()
 826         for class_id in range(class_count):
 827             name_idx = u30()
 828             if name_idx == searched_idx:
 829                 # We found the class we're looking for!
 830                 searched_class_id = class_id
 831             u30()  # super_name idx
 832             flags = read_byte()
 833             if flags & 0x08 != 0:  # Protected namespace is present
 834                 u30()  # protected_ns_idx
 835             intrf_count = u30()
 836             for _c2 in range(intrf_count):
 837                 u30()
 838             u30()  # iinit
 839             trait_count = u30()
 840             for _c2 in range(trait_count):
 841                 parse_traits_info()
 842
 843         if searched_class_id is None:
 844             raise ExtractorError(u'Target class %r not found' %
 845                                  TARGET_CLASSNAME)
 846
 847         method_names = {}
 848         method_idxs = {}
 849         for class_id in range(class_count):
 850             u30()  # cinit
 851             trait_count = u30()
 852             for _c2 in range(trait_count):
 853                 trait_methods = parse_traits_info()
 854                 if class_id == searched_class_id:
 855                     method_names.update(trait_methods.items())
 856                     method_idxs.update(dict(
 857                         (idx, name)
 858                         for name, idx in trait_methods.items()))
 859
 860         # Scripts
 861         script_count = u30()
 862         for _c in range(script_count):
 863             u30()  # init
 864             trait_count = u30()
 865             for _c2 in range(trait_count):
 866                 parse_traits_info()
 867
 868         # Method bodies
 869         method_body_count = u30()
 870         Method = collections.namedtuple('Method', ['code', 'local_count'])
 871         methods = {}
 872         for _c in range(method_body_count):
 873             method_idx = u30()
 874             u30()  # max_stack
 875             local_count = u30()
 876             u30()  # init_scope_depth
 877             u30()  # max_scope_depth
 878             code_length = u30()
 879             code = read_bytes(code_length)
 880             if method_idx in method_idxs:
 881                 m = Method(code, local_count)
 882                 methods[method_idxs[method_idx]] = m
 883             exception_count = u30()
 884             for _c2 in range(exception_count):
 885                 u30()  # from
 886                 u30()  # to
 887                 u30()  # target
 888                 u30()  # exc_type
 889                 u30()  # var_name
 890             trait_count = u30()
 891             for _c2 in range(trait_count):
 892                 parse_traits_info()
 893
 894         assert p + code_reader.tell() == len(code_tag)
 895         assert len(methods) == len(method_idxs)
 896
 897         method_pyfunctions = {}
 898
 899         def extract_function(func_name):
 900             if func_name in method_pyfunctions:
 901                 return method_pyfunctions[func_name]
 902             if func_name not in methods:
 903                 raise ExtractorError(u'Cannot find function %r' % func_name)
 904             m = methods[func_name]
 905
 906             def resfunc(args):
 907                 registers = ['(this)'] + list(args) + [None] * m.local_count
 908                 stack = []
 909                 coder = io.BytesIO(m.code)
 910                 while True:
 911                     opcode = struct.unpack('!B', coder.read(1))[0]
 912                     if opcode == 36:  # pushbyte
 913                         v = struct.unpack('!B', coder.read(1))[0]
 914                         stack.append(v)
 915                     elif opcode == 44:  # pushstring
 916                         idx = u30(coder)
 917                         stack.append(constant_strings[idx])
 918                     elif opcode == 48:  # pushscope
 919                         # We don't implement the scope register, so we'll just
 920                         # ignore the popped value
 921                         stack.pop()
 922                     elif opcode == 70:  # callproperty
 923                         index = u30(coder)
 924                         mname = multinames[index]
 925                         arg_count = u30(coder)
 926                         args = list(reversed(
 927                             [stack.pop() for _ in range(arg_count)]))
 928                         obj = stack.pop()
 929                         if mname == u'split':
 930                             assert len(args) == 1
 931                             assert isinstance(args[0], compat_str)
 932                             assert isinstance(obj, compat_str)
 933                             if args[0] == u'':
 934                                 res = list(obj)
 935                             else:
 936                                 res = obj.split(args[0])
 937                             stack.append(res)
 938                         elif mname == u'slice':
 939                             assert len(args) == 1
 940                             assert isinstance(args[0], int)
 941                             assert isinstance(obj, list)
 942                             res = obj[args[0]:]
 943                             stack.append(res)
 944                         elif mname == u'join':
 945                             assert len(args) == 1
 946                             assert isinstance(args[0], compat_str)
 947                             assert isinstance(obj, list)
 948                             res = args[0].join(obj)
 949                             stack.append(res)
 950                         elif mname in method_pyfunctions:
 951                             stack.append(method_pyfunctions[mname](args))
 952                         else:
 953                             raise NotImplementedError(
 954                                 u'Unsupported property %r on %r'
 955                                 % (mname, obj))
 956                     elif opcode == 72:  # returnvalue
 957                         res = stack.pop()
 958                         return res
 959                     elif opcode == 79:  # callpropvoid
 960                         index = u30(coder)
 961                         mname = multinames[index]
 962                         arg_count = u30(coder)
 963                         args = list(reversed(
 964                             [stack.pop() for _ in range(arg_count)]))
 965                         obj = stack.pop()
 966                         if mname == u'reverse':
 967                             assert isinstance(obj, list)
 968                             obj.reverse()
 969                         else:
 970                             raise NotImplementedError(
 971                                 u'Unsupported (void) property %r on %r'
 972                                 % (mname, obj))
 973                     elif opcode == 93:  # findpropstrict
 974                         index = u30(coder)
 975                         mname = multinames[index]
 976                         res = extract_function(mname)
 977                         stack.append(res)
 978                     elif opcode == 97:  # setproperty
 979                         index = u30(coder)
 980                         value = stack.pop()
 981                         idx = stack.pop()
 982                         obj = stack.pop()
 983                         assert isinstance(obj, list)
 984                         assert isinstance(idx, int)
 985                         obj[idx] = value
 986                     elif opcode == 98:  # getlocal
 987                         index = u30(coder)
 988                         stack.append(registers[index])
 989                     elif opcode == 99:  # setlocal
 990                         index = u30(coder)
 991                         value = stack.pop()
 992                         registers[index] = value
 993                     elif opcode == 102:  # getproperty
 994                         index = u30(coder)
 995                         pname = multinames[index]
 996                         if pname == u'length':
 997                             obj = stack.pop()
 998                             assert isinstance(obj, list)
 999                             stack.append(len(obj))
1000                         else:  # Assume attribute access
1001                             idx = stack.pop()
1002                             assert isinstance(idx, int)
1003                             obj = stack.pop()
1004                             assert isinstance(obj, list)
1005                             stack.append(obj[idx])
1006                     elif opcode == 128:  # coerce
1007                         u30(coder)
1008                     elif opcode == 133:  # coerce_s
1009                         assert isinstance(stack[-1], (type(None), compat_str))
1010                     elif opcode == 164:  # modulo
1011                         value2 = stack.pop()
1012                         value1 = stack.pop()
1013                         res = value1 % value2
1014                         stack.append(res)
1015                     elif opcode == 208:  # getlocal_0
1016                         stack.append(registers[0])
1017                     elif opcode == 209:  # getlocal_1
1018                         stack.append(registers[1])
1019                     elif opcode == 210:  # getlocal_2
1020                         stack.append(registers[2])
1021                     elif opcode == 211:  # getlocal_3
1022                         stack.append(registers[3])
1023                     elif opcode == 214:  # setlocal_2
1024                         registers[2] = stack.pop()
1025                     elif opcode == 215:  # setlocal_3
1026                         registers[3] = stack.pop()
1027                     else:
1028                         raise NotImplementedError(
1029                             u'Unsupported opcode %d' % opcode)
1030
1031             method_pyfunctions[func_name] = resfunc
1032             return resfunc
1033
1034         initial_function = extract_function(u'decipher')
1035         return lambda s: initial_function([s])
1036
1037     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1038         """Turn the encrypted s field into a working signature"""
1039
1040         if player_url is not None:
1041             try:
1042                 player_id = (player_url, len(s))
1043                 if player_id not in self._player_cache:
1044                     func = self._extract_signature_function(
1045                         video_id, player_url, len(s)
1046                     )
1047                     self._player_cache[player_id] = func
1048                 func = self._player_cache[player_id]
1049                 if self._downloader.params.get('youtube_print_sig_code'):
1050                     self._print_sig_code(func, len(s))
1051                 return func(s)
1052             except Exception:
1053                 tb = traceback.format_exc()
1054                 self._downloader.report_warning(
1055                     u'Automatic signature extraction failed: ' + tb)
1056
1057             self._downloader.report_warning(
1058                 u'Warning: Falling back to static signature algorithm')
1059
1060         return self._static_decrypt_signature(
1061             s, video_id, player_url, age_gate)
1062
1063     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1064         if age_gate:
1065             # The videos with age protection use another player, so the
1066             # algorithms can be different.
1067             if len(s) == 86:
1068                 return s[2:63] + s[82] + s[64:82] + s[63]
1069
1070         if len(s) == 93:
1071             return s[86:29:-1] + s[88] + s[28:5:-1]
1072         elif len(s) == 92:
1073             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1074         elif len(s) == 91:
1075             return s[84:27:-1] + s[86] + s[26:5:-1]
1076         elif len(s) == 90:
1077             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1078         elif len(s) == 89:
1079             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1080         elif len(s) == 88:
1081             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1082         elif len(s) == 87:
1083             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1084         elif len(s) == 86:
1085             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1086         elif len(s) == 85:
1087             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1088         elif len(s) == 84:
1089             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1090         elif len(s) == 83:
1091             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1092         elif len(s) == 82:
1093             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1094         elif len(s) == 81:
1095             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1096         elif len(s) == 80:
1097             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1098         elif len(s) == 79:
1099             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1100
1101         else:
1102             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1103
1104     def _get_available_subtitles(self, video_id):
1105         try:
1106             sub_list = self._download_webpage(
1107                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1108                 video_id, note=False)
1109         except ExtractorError as err:
1110             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1111             return {}
1112         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1113
1114         sub_lang_list = {}
1115         for l in lang_list:
1116             lang = l[1]
1117             params = compat_urllib_parse.urlencode({
1118                 'lang': lang,
1119                 'v': video_id,
1120                 'fmt': self._downloader.params.get('subtitlesformat'),
1121                 'name': l[0],
1122             })
1123             url = u'http://www.youtube.com/api/timedtext?' + params
1124             sub_lang_list[lang] = url
1125         if not sub_lang_list:
1126             self._downloader.report_warning(u'video doesn\'t have subtitles')
1127             return {}
1128         return sub_lang_list
1129
1130     def _get_available_automatic_caption(self, video_id, webpage):
1131         """We need the webpage for getting the captions url, pass it as an
1132            argument to speed up the process."""
1133         sub_format = self._downloader.params.get('subtitlesformat')
1134         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1135         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1136         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1137         if mobj is None:
1138             self._downloader.report_warning(err_msg)
1139             return {}
1140         player_config = json.loads(mobj.group(1))
1141         try:
1142             args = player_config[u'args']
1143             caption_url = args[u'ttsurl']
1144             timestamp = args[u'timestamp']
1145             # We get the available subtitles
1146             list_params = compat_urllib_parse.urlencode({
1147                 'type': 'list',
1148                 'tlangs': 1,
1149                 'asrs': 1,
1150             })
1151             list_url = caption_url + '&' + list_params
1152             list_page = self._download_webpage(list_url, video_id)
1153             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1154             original_lang_node = caption_list.find('track')
1155             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1156                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1157                 return {}
1158             original_lang = original_lang_node.attrib['lang_code']
1159
1160             sub_lang_list = {}
1161             for lang_node in caption_list.findall('target'):
1162                 sub_lang = lang_node.attrib['lang_code']
1163                 params = compat_urllib_parse.urlencode({
1164                     'lang': original_lang,
1165                     'tlang': sub_lang,
1166                     'fmt': sub_format,
1167                     'ts': timestamp,
1168                     'kind': 'asr',
1169                 })
1170                 sub_lang_list[sub_lang] = caption_url + '&' + params
1171             return sub_lang_list
1172         # An extractor error can be raise by the download process if there are
1173         # no automatic captions but there are subtitles
1174         except (KeyError, ExtractorError):
1175             self._downloader.report_warning(err_msg)
1176             return {}
1177
1178     def _print_formats(self, formats):
1179         print('Available formats:')
1180         for x in formats:
1181             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1182                                         self._video_dimensions.get(x, '???'),
1183                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1184
1185     def _extract_id(self, url):
1186         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1187         if mobj is None:
1188             raise ExtractorError(u'Invalid URL: %s' % url)
1189         video_id = mobj.group(2)
1190         return video_id
1191
1192     def _get_video_url_list(self, url_map):
1193         """
1194         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1195         with the requested formats.
1196         """
1197         req_format = self._downloader.params.get('format', None)
1198         format_limit = self._downloader.params.get('format_limit', None)
1199         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1200         if format_limit is not None and format_limit in available_formats:
1201             format_list = available_formats[available_formats.index(format_limit):]
1202         else:
1203             format_list = available_formats
1204         existing_formats = [x for x in format_list if x in url_map]
1205         if len(existing_formats) == 0:
1206             raise ExtractorError(u'no known formats available for video')
1207         if self._downloader.params.get('listformats', None):
1208             self._print_formats(existing_formats)
1209             return
1210         if req_format is None or req_format == 'best':
1211             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1212         elif req_format == 'worst':
1213             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1214         elif req_format in ('-1', 'all'):
1215             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1216         else:
1217             # Specific formats. We pick the first in a slash-delimeted sequence.
1218             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1219             # available in the specified format. For example,
1220             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1221             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1222             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1223             req_formats = req_format.split('/')
1224             video_url_list = None
1225             for rf in req_formats:
1226                 if rf in url_map:
1227                     video_url_list = [(rf, url_map[rf])]
1228                     break
1229                 if rf in self._video_formats_map:
1230                     for srf in self._video_formats_map[rf]:
1231                         if srf in url_map:
1232                             video_url_list = [(srf, url_map[srf])]
1233                             break
1234                     else:
1235                         continue
1236                     break
1237             if video_url_list is None:
1238                 raise ExtractorError(u'requested format not available')
1239         return video_url_list
1240
1241     def _extract_from_m3u8(self, manifest_url, video_id):
1242         url_map = {}
1243         def _get_urls(_manifest):
1244             lines = _manifest.split('\n')
1245             urls = filter(lambda l: l and not l.startswith('#'),
1246                             lines)
1247             return urls
1248         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1249         formats_urls = _get_urls(manifest)
1250         for format_url in formats_urls:
1251             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1252             url_map[itag] = format_url
1253         return url_map
1254
1255     def _extract_annotations(self, video_id):
1256         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1257         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1258
1259     def _real_extract(self, url):
1260         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1261         mobj = re.search(self._NEXT_URL_RE, url)
1262         if mobj:
1263             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1264         video_id = self._extract_id(url)
1265
1266         # Get video webpage
1267         self.report_video_webpage_download(video_id)
1268         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1269         request = compat_urllib_request.Request(url)
1270         try:
1271             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1272         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1273             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1274
1275         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1276
1277         # Attempt to extract SWF player URL
1278         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1279         if mobj is not None:
1280             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1281         else:
1282             player_url = None
1283
1284         # Get video info
1285         self.report_video_info_webpage_download(video_id)
1286         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1287             self.report_age_confirmation()
1288             age_gate = True
1289             # We simulate the access to the video from www.youtube.com/v/{video_id}
1290             # this can be viewed without login into Youtube
1291             data = compat_urllib_parse.urlencode({'video_id': video_id,
1292                                                   'el': 'embedded',
1293                                                   'gl': 'US',
1294                                                   'hl': 'en',
1295                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1296                                                   'asv': 3,
1297                                                   'sts':'1588',
1298                                                   })
1299             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1300             video_info_webpage = self._download_webpage(video_info_url, video_id,
1301                                     note=False,
1302                                     errnote='unable to download video info webpage')
1303             video_info = compat_parse_qs(video_info_webpage)
1304         else:
1305             age_gate = False
1306             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1307                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1308                         % (video_id, el_type))
1309                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1310                                         note=False,
1311                                         errnote='unable to download video info webpage')
1312                 video_info = compat_parse_qs(video_info_webpage)
1313                 if 'token' in video_info:
1314                     break
1315         if 'token' not in video_info:
1316             if 'reason' in video_info:
1317                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1318             else:
1319                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1320
1321         # Check for "rental" videos
1322         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1323             raise ExtractorError(u'"rental" videos not supported')
1324
1325         # Start extracting information
1326         self.report_information_extraction(video_id)
1327
1328         # uploader
1329         if 'author' not in video_info:
1330             raise ExtractorError(u'Unable to extract uploader name')
1331         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1332
1333         # uploader_id
1334         video_uploader_id = None
1335         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1336         if mobj is not None:
1337             video_uploader_id = mobj.group(1)
1338         else:
1339             self._downloader.report_warning(u'unable to extract uploader nickname')
1340
1341         # title
1342         if 'title' in video_info:
1343             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1344         else:
1345             self._downloader.report_warning(u'Unable to extract video title')
1346             video_title = u'_'
1347
1348         # thumbnail image
1349         # We try first to get a high quality image:
1350         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1351                             video_webpage, re.DOTALL)
1352         if m_thumb is not None:
1353             video_thumbnail = m_thumb.group(1)
1354         elif 'thumbnail_url' not in video_info:
1355             self._downloader.report_warning(u'unable to extract video thumbnail')
1356             video_thumbnail = None
1357         else:   # don't panic if we can't find it
1358             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1359
1360         # upload date
1361         upload_date = None
1362         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1363         if mobj is not None:
1364             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1365             upload_date = unified_strdate(upload_date)
1366
1367         # description
1368         video_description = get_element_by_id("eow-description", video_webpage)
1369         if video_description:
1370             video_description = clean_html(video_description)
1371         else:
1372             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1373             if fd_mobj:
1374                 video_description = unescapeHTML(fd_mobj.group(1))
1375             else:
1376                 video_description = u''
1377
1378         # subtitles
1379         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1380
1381         if self._downloader.params.get('listsubtitles', False):
1382             self._list_available_subtitles(video_id, video_webpage)
1383             return
1384
1385         if 'length_seconds' not in video_info:
1386             self._downloader.report_warning(u'unable to extract video duration')
1387             video_duration = ''
1388         else:
1389             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1390
1391         # annotations
1392         video_annotations = None
1393         if self._downloader.params.get('writeannotations', False):
1394                 video_annotations = self._extract_annotations(video_id)
1395
1396         # Decide which formats to download
1397
1398         try:
1399             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1400             if not mobj:
1401                 raise ValueError('Could not find vevo ID')
1402             info = json.loads(mobj.group(1))
1403             args = info['args']
1404             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1405             # this signatures are encrypted
1406             if 'url_encoded_fmt_stream_map' not in args:
1407                 raise ValueError(u'No stream_map present')  # caught below
1408             re_signature = re.compile(r'[&,]s=')
1409             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1410             if m_s is not None:
1411                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1412                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1413             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1414             if m_s is not None:
1415                 if 'adaptive_fmts' in video_info:
1416                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1417                 else:
1418                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1419         except ValueError:
1420             pass
1421
1422         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1423             self.report_rtmp_download()
1424             video_url_list = [(None, video_info['conn'][0])]
1425         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1426             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1427             if 'rtmpe%3Dyes' in encoded_url_map:
1428                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1429             url_map = {}
1430             for url_data_str in encoded_url_map.split(','):
1431                 url_data = compat_parse_qs(url_data_str)
1432                 if 'itag' in url_data and 'url' in url_data:
1433                     url = url_data['url'][0]
1434                     if 'sig' in url_data:
1435                         url += '&signature=' + url_data['sig'][0]
1436                     elif 's' in url_data:
1437                         encrypted_sig = url_data['s'][0]
1438                         if self._downloader.params.get('verbose'):
1439                             if age_gate:
1440                                 if player_url is None:
1441                                     player_version = 'unknown'
1442                                 else:
1443                                     player_version = self._search_regex(
1444                                         r'-(.+)\.swf$', player_url,
1445                                         u'flash player', fatal=False)
1446                                 player_desc = 'flash player %s' % player_version
1447                             else:
1448                                 player_version = self._search_regex(
1449                                     r'html5player-(.+?)\.js', video_webpage,
1450                                     'html5 player', fatal=False)
1451                                 player_desc = u'html5 player %s' % player_version
1452
1453                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1454                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1455                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1456
1457                         if not age_gate:
1458                             jsplayer_url_json = self._search_regex(
1459                                 r'"assets":.+?"js":\s*("[^"]+")',
1460                                 video_webpage, u'JS player URL')
1461                             player_url = json.loads(jsplayer_url_json)
1462
1463                         signature = self._decrypt_signature(
1464                             encrypted_sig, video_id, player_url, age_gate)
1465                         url += '&signature=' + signature
1466                     if 'ratebypass' not in url:
1467                         url += '&ratebypass=yes'
1468                     url_map[url_data['itag'][0]] = url
1469             video_url_list = self._get_video_url_list(url_map)
1470             if not video_url_list:
1471                 return
1472         elif video_info.get('hlsvp'):
1473             manifest_url = video_info['hlsvp'][0]
1474             url_map = self._extract_from_m3u8(manifest_url, video_id)
1475             video_url_list = self._get_video_url_list(url_map)
1476             if not video_url_list:
1477                 return
1478
1479         else:
1480             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1481
1482         results = []
1483         for itag, video_real_url in video_url_list:
1484             # Extension
1485             video_extension = self._video_extensions.get(itag, 'flv')
1486
1487             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1488                                               self._video_dimensions.get(itag, '???'),
1489                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1490
1491             results.append({
1492                 'id':       video_id,
1493                 'url':      video_real_url,
1494                 'uploader': video_uploader,
1495                 'uploader_id': video_uploader_id,
1496                 'upload_date':  upload_date,
1497                 'title':    video_title,
1498                 'ext':      video_extension,
1499                 'format':   video_format,
1500                 'format_id': itag,
1501                 'thumbnail':    video_thumbnail,
1502                 'description':  video_description,
1503                 'player_url':   player_url,
1504                 'subtitles':    video_subtitles,
1505                 'duration':     video_duration,
1506                 'age_limit':    18 if age_gate else 0,
1507                 'annotations':  video_annotations
1508             })
1509         return results
1510
1511 class YoutubePlaylistIE(InfoExtractor):
1512     IE_DESC = u'YouTube.com playlists'
1513     _VALID_URL = r"""(?:
1514                         (?:https?://)?
1515                         (?:\w+\.)?
1516                         youtube\.com/
1517                         (?:
1518                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1519                            \? (?:.*?&)*? (?:p|a|list)=
1520                         |  p/
1521                         )
1522                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1523                         .*
1524                      |
1525                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1526                      )"""
1527     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1528     _MAX_RESULTS = 50
1529     IE_NAME = u'youtube:playlist'
1530
1531     @classmethod
1532     def suitable(cls, url):
1533         """Receives a URL and returns True if suitable for this IE."""
1534         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1535
1536     def _real_extract(self, url):
1537         # Extract playlist id
1538         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1539         if mobj is None:
1540             raise ExtractorError(u'Invalid URL: %s' % url)
1541         playlist_id = mobj.group(1) or mobj.group(2)
1542
1543         # Check if it's a video-specific URL
1544         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1545         if 'v' in query_dict:
1546             video_id = query_dict['v'][0]
1547             if self._downloader.params.get('noplaylist'):
1548                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1549                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1550             else:
1551                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1552
1553         # Download playlist videos from API
1554         videos = []
1555
1556         for page_num in itertools.count(1):
1557             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1558             if start_index >= 1000:
1559                 self._downloader.report_warning(u'Max number of results reached')
1560                 break
1561             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1562             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1563
1564             try:
1565                 response = json.loads(page)
1566             except ValueError as err:
1567                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1568
1569             if 'feed' not in response:
1570                 raise ExtractorError(u'Got a malformed response from YouTube API')
1571             playlist_title = response['feed']['title']['$t']
1572             if 'entry' not in response['feed']:
1573                 # Number of videos is a multiple of self._MAX_RESULTS
1574                 break
1575
1576             for entry in response['feed']['entry']:
1577                 index = entry['yt$position']['$t']
1578                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1579                     videos.append((
1580                         index,
1581                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1582                     ))
1583
1584         videos = [v[1] for v in sorted(videos)]
1585
1586         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1587         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1588
1589
1590 class YoutubeChannelIE(InfoExtractor):
1591     IE_DESC = u'YouTube.com channels'
1592     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1593     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1594     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1595     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1596     IE_NAME = u'youtube:channel'
1597
1598     def extract_videos_from_page(self, page):
1599         ids_in_page = []
1600         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1601             if mobj.group(1) not in ids_in_page:
1602                 ids_in_page.append(mobj.group(1))
1603         return ids_in_page
1604
1605     def _real_extract(self, url):
1606         # Extract channel id
1607         mobj = re.match(self._VALID_URL, url)
1608         if mobj is None:
1609             raise ExtractorError(u'Invalid URL: %s' % url)
1610
1611         # Download channel page
1612         channel_id = mobj.group(1)
1613         video_ids = []
1614         pagenum = 1
1615
1616         url = self._TEMPLATE_URL % (channel_id, pagenum)
1617         page = self._download_webpage(url, channel_id,
1618                                       u'Downloading page #%s' % pagenum)
1619
1620         # Extract video identifiers
1621         ids_in_page = self.extract_videos_from_page(page)
1622         video_ids.extend(ids_in_page)
1623
1624         # Download any subsequent channel pages using the json-based channel_ajax query
1625         if self._MORE_PAGES_INDICATOR in page:
1626             for pagenum in itertools.count(1):
1627                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1628                 page = self._download_webpage(url, channel_id,
1629                                               u'Downloading page #%s' % pagenum)
1630
1631                 page = json.loads(page)
1632
1633                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1634                 video_ids.extend(ids_in_page)
1635
1636                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1637                     break
1638
1639         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1640
1641         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1642         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1643         return [self.playlist_result(url_entries, channel_id)]
1644
1645
1646 class YoutubeUserIE(InfoExtractor):
1647     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1648     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1649     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1650     _GDATA_PAGE_SIZE = 50
1651     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1652     IE_NAME = u'youtube:user'
1653
1654     @classmethod
1655     def suitable(cls, url):
1656         # Don't return True if the url can be extracted with other youtube
1657         # extractor, the regex would is too permissive and it would match.
1658         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1659         if any(ie.suitable(url) for ie in other_ies): return False
1660         else: return super(YoutubeUserIE, cls).suitable(url)
1661
1662     def _real_extract(self, url):
1663         # Extract username
1664         mobj = re.match(self._VALID_URL, url)
1665         if mobj is None:
1666             raise ExtractorError(u'Invalid URL: %s' % url)
1667
1668         username = mobj.group(1)
1669
1670         # Download video ids using YouTube Data API. Result size per
1671         # query is limited (currently to 50 videos) so we need to query
1672         # page by page until there are no video ids - it means we got
1673         # all of them.
1674
1675         video_ids = []
1676
1677         for pagenum in itertools.count(0):
1678             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1679
1680             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1681             page = self._download_webpage(gdata_url, username,
1682                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1683
1684             try:
1685                 response = json.loads(page)
1686             except ValueError as err:
1687                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1688             if 'entry' not in response['feed']:
1689                 # Number of videos is a multiple of self._MAX_RESULTS
1690                 break
1691
1692             # Extract video identifiers
1693             ids_in_page = []
1694             for entry in response['feed']['entry']:
1695                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1696             video_ids.extend(ids_in_page)
1697
1698             # A little optimization - if current page is not
1699             # "full", ie. does not contain PAGE_SIZE video ids then
1700             # we can assume that this page is the last one - there
1701             # are no more ids on further pages - no need to query
1702             # again.
1703
1704             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1705                 break
1706
1707         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1708         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1709         return [self.playlist_result(url_results, playlist_title = username)]
1710
1711 class YoutubeSearchIE(SearchInfoExtractor):
1712     IE_DESC = u'YouTube.com searches'
1713     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1714     _MAX_RESULTS = 1000
1715     IE_NAME = u'youtube:search'
1716     _SEARCH_KEY = 'ytsearch'
1717
1718     def report_download_page(self, query, pagenum):
1719         """Report attempt to download search page with given number."""
1720         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1721
1722     def _get_n_results(self, query, n):
1723         """Get a specified number of results for a query"""
1724
1725         video_ids = []
1726         pagenum = 0
1727         limit = n
1728
1729         while (50 * pagenum) < limit:
1730             self.report_download_page(query, pagenum+1)
1731             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1732             request = compat_urllib_request.Request(result_url)
1733             try:
1734                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1735             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1736                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1737             api_response = json.loads(data)['data']
1738
1739             if not 'items' in api_response:
1740                 raise ExtractorError(u'[youtube] No video results')
1741
1742             new_ids = list(video['id'] for video in api_response['items'])
1743             video_ids += new_ids
1744
1745             limit = min(n, api_response['totalItems'])
1746             pagenum += 1
1747
1748         if len(video_ids) > n:
1749             video_ids = video_ids[:n]
1750         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1751         return self.playlist_result(videos, query)
1752
1753
1754 class YoutubeShowIE(InfoExtractor):
1755     IE_DESC = u'YouTube.com (multi-season) shows'
1756     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1757     IE_NAME = u'youtube:show'
1758
1759     def _real_extract(self, url):
1760         mobj = re.match(self._VALID_URL, url)
1761         show_name = mobj.group(1)
1762         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1763         # There's one playlist for each season of the show
1764         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1765         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1766         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1767
1768
1769 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1770     """
1771     Base class for extractors that fetch info from
1772     http://www.youtube.com/feed_ajax
1773     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1774     """
1775     _LOGIN_REQUIRED = True
1776     _PAGING_STEP = 30
1777     # use action_load_personal_feed instead of action_load_system_feed
1778     _PERSONAL_FEED = False
1779
1780     @property
1781     def _FEED_TEMPLATE(self):
1782         action = 'action_load_system_feed'
1783         if self._PERSONAL_FEED:
1784             action = 'action_load_personal_feed'
1785         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1786
1787     @property
1788     def IE_NAME(self):
1789         return u'youtube:%s' % self._FEED_NAME
1790
1791     def _real_initialize(self):
1792         self._login()
1793
1794     def _real_extract(self, url):
1795         feed_entries = []
1796         # The step argument is available only in 2.7 or higher
1797         for i in itertools.count(0):
1798             paging = i*self._PAGING_STEP
1799             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1800                                           u'%s feed' % self._FEED_NAME,
1801                                           u'Downloading page %s' % i)
1802             info = json.loads(info)
1803             feed_html = info['feed_html']
1804             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1805             ids = orderedSet(m.group(1) for m in m_ids)
1806             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1807             if info['paging'] is None:
1808                 break
1809         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1810
1811 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1814     _FEED_NAME = 'subscriptions'
1815     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1816
1817 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1818     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1820     _FEED_NAME = 'recommended'
1821     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1822
1823 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1824     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1825     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1826     _FEED_NAME = 'watch_later'
1827     _PLAYLIST_TITLE = u'Youtube Watch Later'
1828     _PAGING_STEP = 100
1829     _PERSONAL_FEED = True
1830
1831 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1832     IE_NAME = u'youtube:favorites'
1833     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1834     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1835     _LOGIN_REQUIRED = True
1836
1837     def _real_extract(self, url):
1838         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1839         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1840         return self.url_result(playlist_id, 'YoutubePlaylist')
1841
1842
1843 class YoutubeTruncatedURLIE(InfoExtractor):
1844     IE_NAME = 'youtube:truncated_url'
1845     IE_DESC = False  # Do not list
1846     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1847
1848     def _real_extract(self, url):
1849         raise ExtractorError(
1850             u'Did you forget to quote the URL? Remember that & is a meta '
1851             u'character in most shells, so you want to put the URL in quotes, '
1852             u'like  youtube-dl '
1853             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1854             u' (or simply  youtube-dl BaW_jenozKc  ).',
1855             expected=True)