_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import zlib
  15
  16 from .common import InfoExtractor, SearchInfoExtractor
  17 from .subtitles import SubtitlesInfoExtractor
  18 from ..utils import (
  19     compat_chr,
  20     compat_http_client,
  21     compat_parse_qs,
  22     compat_urllib_error,
  23     compat_urllib_parse,
  24     compat_urllib_request,
  25     compat_urlparse,
  26     compat_str,
  27
  28     clean_html,
  29     get_cachedir,
  30     get_element_by_id,
  31     get_element_by_attribute,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  78                                   login_page, u'Login GALX parameter')
  79
  80         # Log in
  81         login_form_strs = {
  82                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  83                 u'Email': username,
  84                 u'GALX': galx,
  85                 u'Passwd': password,
  86                 u'PersistentCookie': u'yes',
  87                 u'_utf8': u'霱',
  88                 u'bgresponse': u'js_disabled',
  89                 u'checkConnection': u'',
  90                 u'checkedDomains': u'youtube',
  91                 u'dnConn': u'',
  92                 u'pstMsg': u'0',
  93                 u'rmShown': u'1',
  94                 u'secTok': u'',
  95                 u'signIn': u'Sign in',
  96                 u'timeStmp': u'',
  97                 u'service': u'youtube',
  98                 u'uilel': u'3',
  99                 u'hl': u'en_US',
 100         }
 101         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 102         # chokes on unicode
 103         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 104         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 105         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 106         try:
 107             self.report_login()
 108             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 109             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 110                 self._downloader.report_warning(u'unable to log in: bad username or password')
 111                 return False
 112         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 113             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 114             return False
 115         return True
 116
 117     def _confirm_age(self):
 118         age_form = {
 119                 'next_url':     '/',
 120                 'action_confirm':   'Confirm',
 121                 }
 122         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 123         try:
 124             self.report_age_confirmation()
 125             compat_urllib_request.urlopen(request).read().decode('utf-8')
 126         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 127             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 128         return True
 129
 130     def _real_initialize(self):
 131         if self._downloader is None:
 132             return
 133         if not self._set_language():
 134             return
 135         if not self._login():
 136             return
 137         self._confirm_age()
 138
 139
 140 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 141     IE_DESC = u'YouTube.com'
 142     _VALID_URL = r"""(?x)^
 143                      (
 144                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
 145                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 146                             tube\.majestyc\.net/|
 147                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 148                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 149                          (?:                                                  # the various things that can precede the ID:
 150                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 151                              |(?:                                             # or the v= param in all its forms
 152                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 153                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 154                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 155                                  v=
 156                              )
 157                          ))
 158                          |youtu\.be/                                          # just youtu.be/xxxx
 159                          )
 160                      )?                                                       # all until now is optional -> you can pass the naked ID
 161                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 162                      (?(1).+)?                                                # if we found the ID, everything can follow
 163                      $"""
 164     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 165     # Listed in order of quality
 166     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 167                           # Apple HTTP Live Streaming
 168                           '96', '95', '94', '93', '92', '132', '151',
 169                           # 3D
 170                           '85', '84', '102', '83', '101', '82', '100',
 171                           # Dash video
 172                           '138', '137', '248', '136', '247', '135', '246',
 173                           '245', '244', '134', '243', '133', '242', '160',
 174                           # Dash audio
 175                           '141', '172', '140', '171', '139',
 176                           ]
 177     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 178                                       # Apple HTTP Live Streaming
 179                                       '96', '95', '94', '93', '92', '132', '151',
 180                                       # 3D
 181                                       '85', '102', '84', '101', '83', '100', '82',
 182                                       # Dash video
 183                                       '138', '248', '137', '247', '136', '246', '245',
 184                                       '244', '135', '243', '134', '242', '133', '160',
 185                                       # Dash audio
 186                                       '172', '141', '171', '140', '139',
 187                                       ]
 188     _video_formats_map = {
 189         'flv': ['35', '34', '6', '5'],
 190         '3gp': ['36', '17', '13'],
 191         'mp4': ['38', '37', '22', '18'],
 192         'webm': ['46', '45', '44', '43'],
 193     }
 194     _video_extensions = {
 195         '13': '3gp',
 196         '17': '3gp',
 197         '18': 'mp4',
 198         '22': 'mp4',
 199         '36': '3gp',
 200         '37': 'mp4',
 201         '38': 'mp4',
 202         '43': 'webm',
 203         '44': 'webm',
 204         '45': 'webm',
 205         '46': 'webm',
 206
 207         # 3d videos
 208         '82': 'mp4',
 209         '83': 'mp4',
 210         '84': 'mp4',
 211         '85': 'mp4',
 212         '100': 'webm',
 213         '101': 'webm',
 214         '102': 'webm',
 215
 216         # Apple HTTP Live Streaming
 217         '92': 'mp4',
 218         '93': 'mp4',
 219         '94': 'mp4',
 220         '95': 'mp4',
 221         '96': 'mp4',
 222         '132': 'mp4',
 223         '151': 'mp4',
 224
 225         # Dash mp4
 226         '133': 'mp4',
 227         '134': 'mp4',
 228         '135': 'mp4',
 229         '136': 'mp4',
 230         '137': 'mp4',
 231         '138': 'mp4',
 232         '160': 'mp4',
 233
 234         # Dash mp4 audio
 235         '139': 'm4a',
 236         '140': 'm4a',
 237         '141': 'm4a',
 238
 239         # Dash webm
 240         '171': 'webm',
 241         '172': 'webm',
 242         '242': 'webm',
 243         '243': 'webm',
 244         '244': 'webm',
 245         '245': 'webm',
 246         '246': 'webm',
 247         '247': 'webm',
 248         '248': 'webm',
 249     }
 250     _video_dimensions = {
 251         '5': '400x240',
 252         '6': '???',
 253         '13': '???',
 254         '17': '176x144',
 255         '18': '640x360',
 256         '22': '1280x720',
 257         '34': '640x360',
 258         '35': '854x480',
 259         '36': '320x240',
 260         '37': '1920x1080',
 261         '38': '4096x3072',
 262         '43': '640x360',
 263         '44': '854x480',
 264         '45': '1280x720',
 265         '46': '1920x1080',
 266         '82': '360p',
 267         '83': '480p',
 268         '84': '720p',
 269         '85': '1080p',
 270         '92': '240p',
 271         '93': '360p',
 272         '94': '480p',
 273         '95': '720p',
 274         '96': '1080p',
 275         '100': '360p',
 276         '101': '480p',
 277         '102': '720p',
 278         '132': '240p',
 279         '151': '72p',
 280         '133': '240p',
 281         '134': '360p',
 282         '135': '480p',
 283         '136': '720p',
 284         '137': '1080p',
 285         '138': '>1080p',
 286         '139': '48k',
 287         '140': '128k',
 288         '141': '256k',
 289         '160': '192p',
 290         '171': '128k',
 291         '172': '256k',
 292         '242': '240p',
 293         '243': '360p',
 294         '244': '480p',
 295         '245': '480p',
 296         '246': '480p',
 297         '247': '720p',
 298         '248': '1080p',
 299     }
 300     _special_itags = {
 301         '82': '3D',
 302         '83': '3D',
 303         '84': '3D',
 304         '85': '3D',
 305         '100': '3D',
 306         '101': '3D',
 307         '102': '3D',
 308         '133': 'DASH Video',
 309         '134': 'DASH Video',
 310         '135': 'DASH Video',
 311         '136': 'DASH Video',
 312         '137': 'DASH Video',
 313         '138': 'DASH Video',
 314         '139': 'DASH Audio',
 315         '140': 'DASH Audio',
 316         '141': 'DASH Audio',
 317         '160': 'DASH Video',
 318         '171': 'DASH Audio',
 319         '172': 'DASH Audio',
 320         '242': 'DASH Video',
 321         '243': 'DASH Video',
 322         '244': 'DASH Video',
 323         '245': 'DASH Video',
 324         '246': 'DASH Video',
 325         '247': 'DASH Video',
 326         '248': 'DASH Video',
 327     }
 328
 329     IE_NAME = u'youtube'
 330     _TESTS = [
 331         {
 332             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 333             u"file":  u"BaW_jenozKc.mp4",
 334             u"info_dict": {
 335                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 336                 u"uploader": u"Philipp Hagemeister",
 337                 u"uploader_id": u"phihag",
 338                 u"upload_date": u"20121002",
 339                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 340             }
 341         },
 342         {
 343             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 344             u"file":  u"UxxajLWwzqY.mp4",
 345             u"note": u"Test generic use_cipher_signature video (#897)",
 346             u"info_dict": {
 347                 u"upload_date": u"20120506",
 348                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 349                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 350                 u"uploader": u"Icona Pop",
 351                 u"uploader_id": u"IconaPop"
 352             }
 353         },
 354         {
 355             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 356             u"file":  u"07FYdnEawAQ.mp4",
 357             u"note": u"Test VEVO video with age protection (#956)",
 358             u"info_dict": {
 359                 u"upload_date": u"20130703",
 360                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 361                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 362                 u"uploader": u"justintimberlakeVEVO",
 363                 u"uploader_id": u"justintimberlakeVEVO"
 364             }
 365         },
 366         {
 367             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
 368             u"file":  u"yZIXLfi8CZQ.mp4",
 369             u"note": u"Embed-only video (#1746)",
 370             u"info_dict": {
 371                 u"upload_date": u"20120608",
 372                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
 373                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
 374                 u"uploader": u"SET India",
 375                 u"uploader_id": u"setindia"
 376             }
 377         },
 378     ]
 379
 380
 381     @classmethod
 382     def suitable(cls, url):
 383         """Receives a URL and returns True if suitable for this IE."""
 384         if YoutubePlaylistIE.suitable(url): return False
 385         return re.match(cls._VALID_URL, url) is not None
 386
 387     def __init__(self, *args, **kwargs):
 388         super(YoutubeIE, self).__init__(*args, **kwargs)
 389         self._player_cache = {}
 390
 391     def report_video_info_webpage_download(self, video_id):
 392         """Report attempt to download video info webpage."""
 393         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 394
 395     def report_information_extraction(self, video_id):
 396         """Report attempt to extract video information."""
 397         self.to_screen(u'%s: Extracting video information' % video_id)
 398
 399     def report_unavailable_format(self, video_id, format):
 400         """Report extracted video URL."""
 401         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 402
 403     def report_rtmp_download(self):
 404         """Indicate the download will use the RTMP protocol."""
 405         self.to_screen(u'RTMP download detected')
 406
 407     def _extract_signature_function(self, video_id, player_url, slen):
 408         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 409                         player_url)
 410         player_type = id_m.group('ext')
 411         player_id = id_m.group('id')
 412
 413         # Read from filesystem cache
 414         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 415         assert os.path.basename(func_id) == func_id
 416         cache_dir = get_cachedir(self._downloader.params)
 417
 418         cache_enabled = cache_dir is not None
 419         if cache_enabled:
 420             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 421                                     u'youtube-sigfuncs',
 422                                     func_id + '.json')
 423             try:
 424                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 425                     cache_spec = json.load(cachef)
 426                 return lambda s: u''.join(s[i] for i in cache_spec)
 427             except IOError:
 428                 pass  # No cache available
 429
 430         if player_type == 'js':
 431             code = self._download_webpage(
 432                 player_url, video_id,
 433                 note=u'Downloading %s player %s' % (player_type, player_id),
 434                 errnote=u'Download of %s failed' % player_url)
 435             res = self._parse_sig_js(code)
 436         elif player_type == 'swf':
 437             urlh = self._request_webpage(
 438                 player_url, video_id,
 439                 note=u'Downloading %s player %s' % (player_type, player_id),
 440                 errnote=u'Download of %s failed' % player_url)
 441             code = urlh.read()
 442             res = self._parse_sig_swf(code)
 443         else:
 444             assert False, 'Invalid player type %r' % player_type
 445
 446         if cache_enabled:
 447             try:
 448                 test_string = u''.join(map(compat_chr, range(slen)))
 449                 cache_res = res(test_string)
 450                 cache_spec = [ord(c) for c in cache_res]
 451                 try:
 452                     os.makedirs(os.path.dirname(cache_fn))
 453                 except OSError as ose:
 454                     if ose.errno != errno.EEXIST:
 455                         raise
 456                 write_json_file(cache_spec, cache_fn)
 457             except Exception:
 458                 tb = traceback.format_exc()
 459                 self._downloader.report_warning(
 460                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 461
 462         return res
 463
 464     def _print_sig_code(self, func, slen):
 465         def gen_sig_code(idxs):
 466             def _genslice(start, end, step):
 467                 starts = u'' if start == 0 else str(start)
 468                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 469                 steps = u'' if step == 1 else (u':%d' % step)
 470                 return u's[%s%s%s]' % (starts, ends, steps)
 471
 472             step = None
 473             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 474                                     # set as soon as step is set
 475             for i, prev in zip(idxs[1:], idxs[:-1]):
 476                 if step is not None:
 477                     if i - prev == step:
 478                         continue
 479                     yield _genslice(start, prev, step)
 480                     step = None
 481                     continue
 482                 if i - prev in [-1, 1]:
 483                     step = i - prev
 484                     start = prev
 485                     continue
 486                 else:
 487                     yield u's[%d]' % prev
 488             if step is None:
 489                 yield u's[%d]' % i
 490             else:
 491                 yield _genslice(start, i, step)
 492
 493         test_string = u''.join(map(compat_chr, range(slen)))
 494         cache_res = func(test_string)
 495         cache_spec = [ord(c) for c in cache_res]
 496         expr_code = u' + '.join(gen_sig_code(cache_spec))
 497         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 498         self.to_screen(u'Extracted signature function:\n' + code)
 499
 500     def _parse_sig_js(self, jscode):
 501         funcname = self._search_regex(
 502             r'signature=([a-zA-Z]+)', jscode,
 503             u'Initial JS player signature function name')
 504
 505         functions = {}
 506
 507         def argidx(varname):
 508             return string.lowercase.index(varname)
 509
 510         def interpret_statement(stmt, local_vars, allow_recursion=20):
 511             if allow_recursion < 0:
 512                 raise ExtractorError(u'Recursion limit reached')
 513
 514             if stmt.startswith(u'var '):
 515                 stmt = stmt[len(u'var '):]
 516             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 517                              r'=(?P<expr>.*)$', stmt)
 518             if ass_m:
 519                 if ass_m.groupdict().get('index'):
 520                     def assign(val):
 521                         lvar = local_vars[ass_m.group('out')]
 522                         idx = interpret_expression(ass_m.group('index'),
 523                                                    local_vars, allow_recursion)
 524                         assert isinstance(idx, int)
 525                         lvar[idx] = val
 526                         return val
 527                     expr = ass_m.group('expr')
 528                 else:
 529                     def assign(val):
 530                         local_vars[ass_m.group('out')] = val
 531                         return val
 532                     expr = ass_m.group('expr')
 533             elif stmt.startswith(u'return '):
 534                 assign = lambda v: v
 535                 expr = stmt[len(u'return '):]
 536             else:
 537                 raise ExtractorError(
 538                     u'Cannot determine left side of statement in %r' % stmt)
 539
 540             v = interpret_expression(expr, local_vars, allow_recursion)
 541             return assign(v)
 542
 543         def interpret_expression(expr, local_vars, allow_recursion):
 544             if expr.isdigit():
 545                 return int(expr)
 546
 547             if expr.isalpha():
 548                 return local_vars[expr]
 549
 550             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 551             if m:
 552                 member = m.group('member')
 553                 val = local_vars[m.group('in')]
 554                 if member == 'split("")':
 555                     return list(val)
 556                 if member == 'join("")':
 557                     return u''.join(val)
 558                 if member == 'length':
 559                     return len(val)
 560                 if member == 'reverse()':
 561                     return val[::-1]
 562                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 563                 if slice_m:
 564                     idx = interpret_expression(
 565                         slice_m.group('idx'), local_vars, allow_recursion-1)
 566                     return val[idx:]
 567
 568             m = re.match(
 569                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 570             if m:
 571                 val = local_vars[m.group('in')]
 572                 idx = interpret_expression(m.group('idx'), local_vars,
 573                                            allow_recursion-1)
 574                 return val[idx]
 575
 576             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 577             if m:
 578                 a = interpret_expression(m.group('a'),
 579                                          local_vars, allow_recursion)
 580                 b = interpret_expression(m.group('b'),
 581                                          local_vars, allow_recursion)
 582                 return a % b
 583
 584             m = re.match(
 585                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 586             if m:
 587                 fname = m.group('func')
 588                 if fname not in functions:
 589                     functions[fname] = extract_function(fname)
 590                 argvals = [int(v) if v.isdigit() else local_vars[v]
 591                            for v in m.group('args').split(',')]
 592                 return functions[fname](argvals)
 593             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 594
 595         def extract_function(funcname):
 596             func_m = re.search(
 597                 r'function ' + re.escape(funcname) +
 598                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 599                 jscode)
 600             argnames = func_m.group('args').split(',')
 601
 602             def resf(args):
 603                 local_vars = dict(zip(argnames, args))
 604                 for stmt in func_m.group('code').split(';'):
 605                     res = interpret_statement(stmt, local_vars)
 606                 return res
 607             return resf
 608
 609         initial_function = extract_function(funcname)
 610         return lambda s: initial_function([s])
 611
 612     def _parse_sig_swf(self, file_contents):
 613         if file_contents[1:3] != b'WS':
 614             raise ExtractorError(
 615                 u'Not an SWF file; header is %r' % file_contents[:3])
 616         if file_contents[:1] == b'C':
 617             content = zlib.decompress(file_contents[8:])
 618         else:
 619             raise NotImplementedError(u'Unsupported compression format %r' %
 620                                       file_contents[:1])
 621
 622         def extract_tags(content):
 623             pos = 0
 624             while pos < len(content):
 625                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 626                 pos += 2
 627                 tag_code = header16 >> 6
 628                 tag_len = header16 & 0x3f
 629                 if tag_len == 0x3f:
 630                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 631                     pos += 4
 632                 assert pos+tag_len <= len(content)
 633                 yield (tag_code, content[pos:pos+tag_len])
 634                 pos += tag_len
 635
 636         code_tag = next(tag
 637                         for tag_code, tag in extract_tags(content)
 638                         if tag_code == 82)
 639         p = code_tag.index(b'\0', 4) + 1
 640         code_reader = io.BytesIO(code_tag[p:])
 641
 642         # Parse ABC (AVM2 ByteCode)
 643         def read_int(reader=None):
 644             if reader is None:
 645                 reader = code_reader
 646             res = 0
 647             shift = 0
 648             for _ in range(5):
 649                 buf = reader.read(1)
 650                 assert len(buf) == 1
 651                 b = struct.unpack('<B', buf)[0]
 652                 res = res | ((b & 0x7f) << shift)
 653                 if b & 0x80 == 0:
 654                     break
 655                 shift += 7
 656             return res
 657
 658         def u30(reader=None):
 659             res = read_int(reader)
 660             assert res & 0xf0000000 == 0
 661             return res
 662         u32 = read_int
 663
 664         def s32(reader=None):
 665             v = read_int(reader)
 666             if v & 0x80000000 != 0:
 667                 v = - ((v ^ 0xffffffff) + 1)
 668             return v
 669
 670         def read_string(reader=None):
 671             if reader is None:
 672                 reader = code_reader
 673             slen = u30(reader)
 674             resb = reader.read(slen)
 675             assert len(resb) == slen
 676             return resb.decode('utf-8')
 677
 678         def read_bytes(count, reader=None):
 679             if reader is None:
 680                 reader = code_reader
 681             resb = reader.read(count)
 682             assert len(resb) == count
 683             return resb
 684
 685         def read_byte(reader=None):
 686             resb = read_bytes(1, reader=reader)
 687             res = struct.unpack('<B', resb)[0]
 688             return res
 689
 690         # minor_version + major_version
 691         read_bytes(2 + 2)
 692
 693         # Constant pool
 694         int_count = u30()
 695         for _c in range(1, int_count):
 696             s32()
 697         uint_count = u30()
 698         for _c in range(1, uint_count):
 699             u32()
 700         double_count = u30()
 701         read_bytes((double_count-1) * 8)
 702         string_count = u30()
 703         constant_strings = [u'']
 704         for _c in range(1, string_count):
 705             s = read_string()
 706             constant_strings.append(s)
 707         namespace_count = u30()
 708         for _c in range(1, namespace_count):
 709             read_bytes(1)  # kind
 710             u30()  # name
 711         ns_set_count = u30()
 712         for _c in range(1, ns_set_count):
 713             count = u30()
 714             for _c2 in range(count):
 715                 u30()
 716         multiname_count = u30()
 717         MULTINAME_SIZES = {
 718             0x07: 2,  # QName
 719             0x0d: 2,  # QNameA
 720             0x0f: 1,  # RTQName
 721             0x10: 1,  # RTQNameA
 722             0x11: 0,  # RTQNameL
 723             0x12: 0,  # RTQNameLA
 724             0x09: 2,  # Multiname
 725             0x0e: 2,  # MultinameA
 726             0x1b: 1,  # MultinameL
 727             0x1c: 1,  # MultinameLA
 728         }
 729         multinames = [u'']
 730         for _c in range(1, multiname_count):
 731             kind = u30()
 732             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 733             if kind == 0x07:
 734                 u30()  # namespace_idx
 735                 name_idx = u30()
 736                 multinames.append(constant_strings[name_idx])
 737             else:
 738                 multinames.append('[MULTINAME kind: %d]' % kind)
 739                 for _c2 in range(MULTINAME_SIZES[kind]):
 740                     u30()
 741
 742         # Methods
 743         method_count = u30()
 744         MethodInfo = collections.namedtuple(
 745             'MethodInfo',
 746             ['NEED_ARGUMENTS', 'NEED_REST'])
 747         method_infos = []
 748         for method_id in range(method_count):
 749             param_count = u30()
 750             u30()  # return type
 751             for _ in range(param_count):
 752                 u30()  # param type
 753             u30()  # name index (always 0 for youtube)
 754             flags = read_byte()
 755             if flags & 0x08 != 0:
 756                 # Options present
 757                 option_count = u30()
 758                 for c in range(option_count):
 759                     u30()  # val
 760                     read_bytes(1)  # kind
 761             if flags & 0x80 != 0:
 762                 # Param names present
 763                 for _ in range(param_count):
 764                     u30()  # param name
 765             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 766             method_infos.append(mi)
 767
 768         # Metadata
 769         metadata_count = u30()
 770         for _c in range(metadata_count):
 771             u30()  # name
 772             item_count = u30()
 773             for _c2 in range(item_count):
 774                 u30()  # key
 775                 u30()  # value
 776
 777         def parse_traits_info():
 778             trait_name_idx = u30()
 779             kind_full = read_byte()
 780             kind = kind_full & 0x0f
 781             attrs = kind_full >> 4
 782             methods = {}
 783             if kind in [0x00, 0x06]:  # Slot or Const
 784                 u30()  # Slot id
 785                 u30()  # type_name_idx
 786                 vindex = u30()
 787                 if vindex != 0:
 788                     read_byte()  # vkind
 789             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 790                 u30()  # disp_id
 791                 method_idx = u30()
 792                 methods[multinames[trait_name_idx]] = method_idx
 793             elif kind == 0x04:  # Class
 794                 u30()  # slot_id
 795                 u30()  # classi
 796             elif kind == 0x05:  # Function
 797                 u30()  # slot_id
 798                 function_idx = u30()
 799                 methods[function_idx] = multinames[trait_name_idx]
 800             else:
 801                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 802
 803             if attrs & 0x4 != 0:  # Metadata present
 804                 metadata_count = u30()
 805                 for _c3 in range(metadata_count):
 806                     u30()  # metadata index
 807
 808             return methods
 809
 810         # Classes
 811         TARGET_CLASSNAME = u'SignatureDecipher'
 812         searched_idx = multinames.index(TARGET_CLASSNAME)
 813         searched_class_id = None
 814         class_count = u30()
 815         for class_id in range(class_count):
 816             name_idx = u30()
 817             if name_idx == searched_idx:
 818                 # We found the class we're looking for!
 819                 searched_class_id = class_id
 820             u30()  # super_name idx
 821             flags = read_byte()
 822             if flags & 0x08 != 0:  # Protected namespace is present
 823                 u30()  # protected_ns_idx
 824             intrf_count = u30()
 825             for _c2 in range(intrf_count):
 826                 u30()
 827             u30()  # iinit
 828             trait_count = u30()
 829             for _c2 in range(trait_count):
 830                 parse_traits_info()
 831
 832         if searched_class_id is None:
 833             raise ExtractorError(u'Target class %r not found' %
 834                                  TARGET_CLASSNAME)
 835
 836         method_names = {}
 837         method_idxs = {}
 838         for class_id in range(class_count):
 839             u30()  # cinit
 840             trait_count = u30()
 841             for _c2 in range(trait_count):
 842                 trait_methods = parse_traits_info()
 843                 if class_id == searched_class_id:
 844                     method_names.update(trait_methods.items())
 845                     method_idxs.update(dict(
 846                         (idx, name)
 847                         for name, idx in trait_methods.items()))
 848
 849         # Scripts
 850         script_count = u30()
 851         for _c in range(script_count):
 852             u30()  # init
 853             trait_count = u30()
 854             for _c2 in range(trait_count):
 855                 parse_traits_info()
 856
 857         # Method bodies
 858         method_body_count = u30()
 859         Method = collections.namedtuple('Method', ['code', 'local_count'])
 860         methods = {}
 861         for _c in range(method_body_count):
 862             method_idx = u30()
 863             u30()  # max_stack
 864             local_count = u30()
 865             u30()  # init_scope_depth
 866             u30()  # max_scope_depth
 867             code_length = u30()
 868             code = read_bytes(code_length)
 869             if method_idx in method_idxs:
 870                 m = Method(code, local_count)
 871                 methods[method_idxs[method_idx]] = m
 872             exception_count = u30()
 873             for _c2 in range(exception_count):
 874                 u30()  # from
 875                 u30()  # to
 876                 u30()  # target
 877                 u30()  # exc_type
 878                 u30()  # var_name
 879             trait_count = u30()
 880             for _c2 in range(trait_count):
 881                 parse_traits_info()
 882
 883         assert p + code_reader.tell() == len(code_tag)
 884         assert len(methods) == len(method_idxs)
 885
 886         method_pyfunctions = {}
 887
 888         def extract_function(func_name):
 889             if func_name in method_pyfunctions:
 890                 return method_pyfunctions[func_name]
 891             if func_name not in methods:
 892                 raise ExtractorError(u'Cannot find function %r' % func_name)
 893             m = methods[func_name]
 894
 895             def resfunc(args):
 896                 registers = ['(this)'] + list(args) + [None] * m.local_count
 897                 stack = []
 898                 coder = io.BytesIO(m.code)
 899                 while True:
 900                     opcode = struct.unpack('!B', coder.read(1))[0]
 901                     if opcode == 36:  # pushbyte
 902                         v = struct.unpack('!B', coder.read(1))[0]
 903                         stack.append(v)
 904                     elif opcode == 44:  # pushstring
 905                         idx = u30(coder)
 906                         stack.append(constant_strings[idx])
 907                     elif opcode == 48:  # pushscope
 908                         # We don't implement the scope register, so we'll just
 909                         # ignore the popped value
 910                         stack.pop()
 911                     elif opcode == 70:  # callproperty
 912                         index = u30(coder)
 913                         mname = multinames[index]
 914                         arg_count = u30(coder)
 915                         args = list(reversed(
 916                             [stack.pop() for _ in range(arg_count)]))
 917                         obj = stack.pop()
 918                         if mname == u'split':
 919                             assert len(args) == 1
 920                             assert isinstance(args[0], compat_str)
 921                             assert isinstance(obj, compat_str)
 922                             if args[0] == u'':
 923                                 res = list(obj)
 924                             else:
 925                                 res = obj.split(args[0])
 926                             stack.append(res)
 927                         elif mname == u'slice':
 928                             assert len(args) == 1
 929                             assert isinstance(args[0], int)
 930                             assert isinstance(obj, list)
 931                             res = obj[args[0]:]
 932                             stack.append(res)
 933                         elif mname == u'join':
 934                             assert len(args) == 1
 935                             assert isinstance(args[0], compat_str)
 936                             assert isinstance(obj, list)
 937                             res = args[0].join(obj)
 938                             stack.append(res)
 939                         elif mname in method_pyfunctions:
 940                             stack.append(method_pyfunctions[mname](args))
 941                         else:
 942                             raise NotImplementedError(
 943                                 u'Unsupported property %r on %r'
 944                                 % (mname, obj))
 945                     elif opcode == 72:  # returnvalue
 946                         res = stack.pop()
 947                         return res
 948                     elif opcode == 79:  # callpropvoid
 949                         index = u30(coder)
 950                         mname = multinames[index]
 951                         arg_count = u30(coder)
 952                         args = list(reversed(
 953                             [stack.pop() for _ in range(arg_count)]))
 954                         obj = stack.pop()
 955                         if mname == u'reverse':
 956                             assert isinstance(obj, list)
 957                             obj.reverse()
 958                         else:
 959                             raise NotImplementedError(
 960                                 u'Unsupported (void) property %r on %r'
 961                                 % (mname, obj))
 962                     elif opcode == 93:  # findpropstrict
 963                         index = u30(coder)
 964                         mname = multinames[index]
 965                         res = extract_function(mname)
 966                         stack.append(res)
 967                     elif opcode == 97:  # setproperty
 968                         index = u30(coder)
 969                         value = stack.pop()
 970                         idx = stack.pop()
 971                         obj = stack.pop()
 972                         assert isinstance(obj, list)
 973                         assert isinstance(idx, int)
 974                         obj[idx] = value
 975                     elif opcode == 98:  # getlocal
 976                         index = u30(coder)
 977                         stack.append(registers[index])
 978                     elif opcode == 99:  # setlocal
 979                         index = u30(coder)
 980                         value = stack.pop()
 981                         registers[index] = value
 982                     elif opcode == 102:  # getproperty
 983                         index = u30(coder)
 984                         pname = multinames[index]
 985                         if pname == u'length':
 986                             obj = stack.pop()
 987                             assert isinstance(obj, list)
 988                             stack.append(len(obj))
 989                         else:  # Assume attribute access
 990                             idx = stack.pop()
 991                             assert isinstance(idx, int)
 992                             obj = stack.pop()
 993                             assert isinstance(obj, list)
 994                             stack.append(obj[idx])
 995                     elif opcode == 128:  # coerce
 996                         u30(coder)
 997                     elif opcode == 133:  # coerce_s
 998                         assert isinstance(stack[-1], (type(None), compat_str))
 999                     elif opcode == 164:  # modulo
1000                         value2 = stack.pop()
1001                         value1 = stack.pop()
1002                         res = value1 % value2
1003                         stack.append(res)
1004                     elif opcode == 208:  # getlocal_0
1005                         stack.append(registers[0])
1006                     elif opcode == 209:  # getlocal_1
1007                         stack.append(registers[1])
1008                     elif opcode == 210:  # getlocal_2
1009                         stack.append(registers[2])
1010                     elif opcode == 211:  # getlocal_3
1011                         stack.append(registers[3])
1012                     elif opcode == 214:  # setlocal_2
1013                         registers[2] = stack.pop()
1014                     elif opcode == 215:  # setlocal_3
1015                         registers[3] = stack.pop()
1016                     else:
1017                         raise NotImplementedError(
1018                             u'Unsupported opcode %d' % opcode)
1019
1020             method_pyfunctions[func_name] = resfunc
1021             return resfunc
1022
1023         initial_function = extract_function(u'decipher')
1024         return lambda s: initial_function([s])
1025
1026     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1027         """Turn the encrypted s field into a working signature"""
1028
1029         if player_url is not None:
1030             if player_url.startswith(u'//'):
1031                 player_url = u'https:' + player_url
1032             try:
1033                 player_id = (player_url, len(s))
1034                 if player_id not in self._player_cache:
1035                     func = self._extract_signature_function(
1036                         video_id, player_url, len(s)
1037                     )
1038                     self._player_cache[player_id] = func
1039                 func = self._player_cache[player_id]
1040                 if self._downloader.params.get('youtube_print_sig_code'):
1041                     self._print_sig_code(func, len(s))
1042                 return func(s)
1043             except Exception:
1044                 tb = traceback.format_exc()
1045                 self._downloader.report_warning(
1046                     u'Automatic signature extraction failed: ' + tb)
1047
1048             self._downloader.report_warning(
1049                 u'Warning: Falling back to static signature algorithm')
1050
1051         return self._static_decrypt_signature(
1052             s, video_id, player_url, age_gate)
1053
1054     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1055         if age_gate:
1056             # The videos with age protection use another player, so the
1057             # algorithms can be different.
1058             if len(s) == 86:
1059                 return s[2:63] + s[82] + s[64:82] + s[63]
1060
1061         if len(s) == 93:
1062             return s[86:29:-1] + s[88] + s[28:5:-1]
1063         elif len(s) == 92:
1064             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1065         elif len(s) == 91:
1066             return s[84:27:-1] + s[86] + s[26:5:-1]
1067         elif len(s) == 90:
1068             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1069         elif len(s) == 89:
1070             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1071         elif len(s) == 88:
1072             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1073         elif len(s) == 87:
1074             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1075         elif len(s) == 86:
1076             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1077         elif len(s) == 85:
1078             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1079         elif len(s) == 84:
1080             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1081         elif len(s) == 83:
1082             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1083         elif len(s) == 82:
1084             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1085         elif len(s) == 81:
1086             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1087         elif len(s) == 80:
1088             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1089         elif len(s) == 79:
1090             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1091
1092         else:
1093             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1094
1095     def _get_available_subtitles(self, video_id, webpage):
1096         try:
1097             sub_list = self._download_webpage(
1098                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1099                 video_id, note=False)
1100         except ExtractorError as err:
1101             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1102             return {}
1103         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1104
1105         sub_lang_list = {}
1106         for l in lang_list:
1107             lang = l[1]
1108             params = compat_urllib_parse.urlencode({
1109                 'lang': lang,
1110                 'v': video_id,
1111                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1112                 'name': l[0].encode('utf-8'),
1113             })
1114             url = u'http://www.youtube.com/api/timedtext?' + params
1115             sub_lang_list[lang] = url
1116         if not sub_lang_list:
1117             self._downloader.report_warning(u'video doesn\'t have subtitles')
1118             return {}
1119         return sub_lang_list
1120
1121     def _get_available_automatic_caption(self, video_id, webpage):
1122         """We need the webpage for getting the captions url, pass it as an
1123            argument to speed up the process."""
1124         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1125         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1126         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1127         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1128         if mobj is None:
1129             self._downloader.report_warning(err_msg)
1130             return {}
1131         player_config = json.loads(mobj.group(1))
1132         try:
1133             args = player_config[u'args']
1134             caption_url = args[u'ttsurl']
1135             timestamp = args[u'timestamp']
1136             # We get the available subtitles
1137             list_params = compat_urllib_parse.urlencode({
1138                 'type': 'list',
1139                 'tlangs': 1,
1140                 'asrs': 1,
1141             })
1142             list_url = caption_url + '&' + list_params
1143             caption_list = self._download_xml(list_url, video_id)
1144             original_lang_node = caption_list.find('track')
1145             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1146                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1147                 return {}
1148             original_lang = original_lang_node.attrib['lang_code']
1149
1150             sub_lang_list = {}
1151             for lang_node in caption_list.findall('target'):
1152                 sub_lang = lang_node.attrib['lang_code']
1153                 params = compat_urllib_parse.urlencode({
1154                     'lang': original_lang,
1155                     'tlang': sub_lang,
1156                     'fmt': sub_format,
1157                     'ts': timestamp,
1158                     'kind': 'asr',
1159                 })
1160                 sub_lang_list[sub_lang] = caption_url + '&' + params
1161             return sub_lang_list
1162         # An extractor error can be raise by the download process if there are
1163         # no automatic captions but there are subtitles
1164         except (KeyError, ExtractorError):
1165             self._downloader.report_warning(err_msg)
1166             return {}
1167
1168     def _print_formats(self, formats):
1169         print('Available formats:')
1170         for x in formats:
1171             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1172                                         self._video_dimensions.get(x, '???'),
1173                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1174
1175     def _extract_id(self, url):
1176         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1177         if mobj is None:
1178             raise ExtractorError(u'Invalid URL: %s' % url)
1179         video_id = mobj.group(2)
1180         return video_id
1181
1182     def _get_video_url_list(self, url_map):
1183         """
1184         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1185         with the requested formats.
1186         """
1187         req_format = self._downloader.params.get('format', None)
1188         format_limit = self._downloader.params.get('format_limit', None)
1189         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1190         if format_limit is not None and format_limit in available_formats:
1191             format_list = available_formats[available_formats.index(format_limit):]
1192         else:
1193             format_list = available_formats
1194         existing_formats = [x for x in format_list if x in url_map]
1195         if len(existing_formats) == 0:
1196             raise ExtractorError(u'no known formats available for video')
1197         if self._downloader.params.get('listformats', None):
1198             self._print_formats(existing_formats)
1199             return
1200         if req_format is None or req_format == 'best':
1201             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1202         elif req_format == 'worst':
1203             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1204         elif req_format in ('-1', 'all'):
1205             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1206         else:
1207             # Specific formats. We pick the first in a slash-delimeted sequence.
1208             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1209             # available in the specified format. For example,
1210             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1211             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1212             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1213             req_formats = req_format.split('/')
1214             video_url_list = None
1215             for rf in req_formats:
1216                 if rf in url_map:
1217                     video_url_list = [(rf, url_map[rf])]
1218                     break
1219                 if rf in self._video_formats_map:
1220                     for srf in self._video_formats_map[rf]:
1221                         if srf in url_map:
1222                             video_url_list = [(srf, url_map[srf])]
1223                             break
1224                     else:
1225                         continue
1226                     break
1227             if video_url_list is None:
1228                 raise ExtractorError(u'requested format not available')
1229         return video_url_list
1230
1231     def _extract_from_m3u8(self, manifest_url, video_id):
1232         url_map = {}
1233         def _get_urls(_manifest):
1234             lines = _manifest.split('\n')
1235             urls = filter(lambda l: l and not l.startswith('#'),
1236                             lines)
1237             return urls
1238         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1239         formats_urls = _get_urls(manifest)
1240         for format_url in formats_urls:
1241             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1242             url_map[itag] = format_url
1243         return url_map
1244
1245     def _extract_annotations(self, video_id):
1246         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1247         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1248
1249     def _real_extract(self, url):
1250         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1251         mobj = re.search(self._NEXT_URL_RE, url)
1252         if mobj:
1253             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1254         video_id = self._extract_id(url)
1255
1256         # Get video webpage
1257         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1258         video_webpage = self._download_webpage(url, video_id)
1259
1260         # Attempt to extract SWF player URL
1261         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1262         if mobj is not None:
1263             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1264         else:
1265             player_url = None
1266
1267         # Get video info
1268         self.report_video_info_webpage_download(video_id)
1269         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1270             self.report_age_confirmation()
1271             age_gate = True
1272             # We simulate the access to the video from www.youtube.com/v/{video_id}
1273             # this can be viewed without login into Youtube
1274             data = compat_urllib_parse.urlencode({'video_id': video_id,
1275                                                   'el': 'player_embedded',
1276                                                   'gl': 'US',
1277                                                   'hl': 'en',
1278                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1279                                                   'asv': 3,
1280                                                   'sts':'1588',
1281                                                   })
1282             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1283             video_info_webpage = self._download_webpage(video_info_url, video_id,
1284                                     note=False,
1285                                     errnote='unable to download video info webpage')
1286             video_info = compat_parse_qs(video_info_webpage)
1287         else:
1288             age_gate = False
1289             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1290                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1291                         % (video_id, el_type))
1292                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1293                                         note=False,
1294                                         errnote='unable to download video info webpage')
1295                 video_info = compat_parse_qs(video_info_webpage)
1296                 if 'token' in video_info:
1297                     break
1298         if 'token' not in video_info:
1299             if 'reason' in video_info:
1300                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1301             else:
1302                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1303
1304         if 'view_count' in video_info:
1305             view_count = int(video_info['view_count'][0])
1306         else:
1307             view_count = None
1308
1309         # Check for "rental" videos
1310         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1311             raise ExtractorError(u'"rental" videos not supported')
1312
1313         # Start extracting information
1314         self.report_information_extraction(video_id)
1315
1316         # uploader
1317         if 'author' not in video_info:
1318             raise ExtractorError(u'Unable to extract uploader name')
1319         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1320
1321         # uploader_id
1322         video_uploader_id = None
1323         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1324         if mobj is not None:
1325             video_uploader_id = mobj.group(1)
1326         else:
1327             self._downloader.report_warning(u'unable to extract uploader nickname')
1328
1329         # title
1330         if 'title' in video_info:
1331             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1332         else:
1333             self._downloader.report_warning(u'Unable to extract video title')
1334             video_title = u'_'
1335
1336         # thumbnail image
1337         # We try first to get a high quality image:
1338         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1339                             video_webpage, re.DOTALL)
1340         if m_thumb is not None:
1341             video_thumbnail = m_thumb.group(1)
1342         elif 'thumbnail_url' not in video_info:
1343             self._downloader.report_warning(u'unable to extract video thumbnail')
1344             video_thumbnail = None
1345         else:   # don't panic if we can't find it
1346             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1347
1348         # upload date
1349         upload_date = None
1350         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1351         if mobj is not None:
1352             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1353             upload_date = unified_strdate(upload_date)
1354
1355         # description
1356         video_description = get_element_by_id("eow-description", video_webpage)
1357         if video_description:
1358             video_description = re.sub(r'''(?x)
1359                 <a\s+
1360                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1361                     title="([^"]+)"\s+
1362                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1363                     class="yt-uix-redirect-link"\s*>
1364                 [^<]+
1365                 </a>
1366             ''', r'\1', video_description)
1367             video_description = clean_html(video_description)
1368         else:
1369             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1370             if fd_mobj:
1371                 video_description = unescapeHTML(fd_mobj.group(1))
1372             else:
1373                 video_description = u''
1374
1375         def _extract_count(klass):
1376             count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
1377             if count is not None:
1378                 return int(count.replace(',', ''))
1379             return None
1380         like_count = _extract_count(u'likes-count')
1381         dislike_count = _extract_count(u'dislikes-count')
1382
1383         # subtitles
1384         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1385
1386         if self._downloader.params.get('listsubtitles', False):
1387             self._list_available_subtitles(video_id, video_webpage)
1388             return
1389
1390         if 'length_seconds' not in video_info:
1391             self._downloader.report_warning(u'unable to extract video duration')
1392             video_duration = ''
1393         else:
1394             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1395
1396         # annotations
1397         video_annotations = None
1398         if self._downloader.params.get('writeannotations', False):
1399                 video_annotations = self._extract_annotations(video_id)
1400
1401         # Decide which formats to download
1402
1403         try:
1404             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1405             if not mobj:
1406                 raise ValueError('Could not find vevo ID')
1407             info = json.loads(mobj.group(1))
1408             args = info['args']
1409             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1410             # this signatures are encrypted
1411             if 'url_encoded_fmt_stream_map' not in args:
1412                 raise ValueError(u'No stream_map present')  # caught below
1413             re_signature = re.compile(r'[&,]s=')
1414             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1415             if m_s is not None:
1416                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1417                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1418             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1419             if m_s is not None:
1420                 if 'adaptive_fmts' in video_info:
1421                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1422                 else:
1423                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1424         except ValueError:
1425             pass
1426
1427         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1428             self.report_rtmp_download()
1429             video_url_list = [(None, video_info['conn'][0])]
1430         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1431             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1432             if 'rtmpe%3Dyes' in encoded_url_map:
1433                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1434             url_map = {}
1435             for url_data_str in encoded_url_map.split(','):
1436                 url_data = compat_parse_qs(url_data_str)
1437                 if 'itag' in url_data and 'url' in url_data:
1438                     url = url_data['url'][0]
1439                     if 'sig' in url_data:
1440                         url += '&signature=' + url_data['sig'][0]
1441                     elif 's' in url_data:
1442                         encrypted_sig = url_data['s'][0]
1443                         if self._downloader.params.get('verbose'):
1444                             if age_gate:
1445                                 if player_url is None:
1446                                     player_version = 'unknown'
1447                                 else:
1448                                     player_version = self._search_regex(
1449                                         r'-(.+)\.swf$', player_url,
1450                                         u'flash player', fatal=False)
1451                                 player_desc = 'flash player %s' % player_version
1452                             else:
1453                                 player_version = self._search_regex(
1454                                     r'html5player-(.+?)\.js', video_webpage,
1455                                     'html5 player', fatal=False)
1456                                 player_desc = u'html5 player %s' % player_version
1457
1458                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1459                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1460                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1461
1462                         if not age_gate:
1463                             jsplayer_url_json = self._search_regex(
1464                                 r'"assets":.+?"js":\s*("[^"]+")',
1465                                 video_webpage, u'JS player URL')
1466                             player_url = json.loads(jsplayer_url_json)
1467
1468                         signature = self._decrypt_signature(
1469                             encrypted_sig, video_id, player_url, age_gate)
1470                         url += '&signature=' + signature
1471                     if 'ratebypass' not in url:
1472                         url += '&ratebypass=yes'
1473                     url_map[url_data['itag'][0]] = url
1474             video_url_list = self._get_video_url_list(url_map)
1475             if not video_url_list:
1476                 return
1477         elif video_info.get('hlsvp'):
1478             manifest_url = video_info['hlsvp'][0]
1479             url_map = self._extract_from_m3u8(manifest_url, video_id)
1480             video_url_list = self._get_video_url_list(url_map)
1481             if not video_url_list:
1482                 return
1483
1484         else:
1485             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1486
1487         results = []
1488         for itag, video_real_url in video_url_list:
1489             # Extension
1490             video_extension = self._video_extensions.get(itag, 'flv')
1491
1492             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1493                                               self._video_dimensions.get(itag, '???'),
1494                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1495
1496             results.append({
1497                 'id':       video_id,
1498                 'url':      video_real_url,
1499                 'uploader': video_uploader,
1500                 'uploader_id': video_uploader_id,
1501                 'upload_date':  upload_date,
1502                 'title':    video_title,
1503                 'ext':      video_extension,
1504                 'format':   video_format,
1505                 'format_id': itag,
1506                 'thumbnail':    video_thumbnail,
1507                 'description':  video_description,
1508                 'player_url':   player_url,
1509                 'subtitles':    video_subtitles,
1510                 'duration':     video_duration,
1511                 'age_limit':    18 if age_gate else 0,
1512                 'annotations':  video_annotations,
1513                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1514                 'view_count': view_count,
1515                 'like_count': like_count,
1516                 'dislike_count': dislike_count,
1517             })
1518         return results
1519
1520 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1521     IE_DESC = u'YouTube.com playlists'
1522     _VALID_URL = r"""(?:
1523                         (?:https?://)?
1524                         (?:\w+\.)?
1525                         youtube\.com/
1526                         (?:
1527                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1528                            \? (?:.*?&)*? (?:p|a|list)=
1529                         |  p/
1530                         )
1531                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1532                         .*
1533                      |
1534                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1535                      )"""
1536     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
1537     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1538     _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1539     IE_NAME = u'youtube:playlist'
1540
1541     @classmethod
1542     def suitable(cls, url):
1543         """Receives a URL and returns True if suitable for this IE."""
1544         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1545
1546     def _real_initialize(self):
1547         self._login()
1548
1549     def _ids_to_results(self, ids):
1550         return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1551                        for vid_id in ids]
1552
1553     def _extract_mix(self, playlist_id):
1554         # The mixes are generated from a a single video
1555         # the id of the playlist is just 'RD' + video_id
1556         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
1557         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
1558         title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
1559             get_element_by_attribute('class', 'title ', webpage))
1560         title = clean_html(title_span)
1561         video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
1562         ids = orderedSet(re.findall(video_re, webpage))
1563         url_results = self._ids_to_results(ids)
1564
1565         return self.playlist_result(url_results, playlist_id, title)
1566
1567     def _real_extract(self, url):
1568         # Extract playlist id
1569         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1570         if mobj is None:
1571             raise ExtractorError(u'Invalid URL: %s' % url)
1572         playlist_id = mobj.group(1) or mobj.group(2)
1573
1574         # Check if it's a video-specific URL
1575         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1576         if 'v' in query_dict:
1577             video_id = query_dict['v'][0]
1578             if self._downloader.params.get('noplaylist'):
1579                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1580                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1581             else:
1582                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1583
1584         if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id
1585             # Mixes require a custom extraction process
1586             return self._extract_mix(playlist_id)
1587
1588         # Extract the video ids from the playlist pages
1589         ids = []
1590
1591         for page_num in itertools.count(1):
1592             url = self._TEMPLATE_URL % (playlist_id, page_num)
1593             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1594             matches = re.finditer(self._VIDEO_RE, page)
1595             # We remove the duplicates and the link with index 0
1596             # (it's not the first video of the playlist)
1597             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1598             ids.extend(new_ids)
1599
1600             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1601                 break
1602
1603         playlist_title = self._og_search_title(page)
1604
1605         url_results = self._ids_to_results(ids)
1606         return self.playlist_result(url_results, playlist_id, playlist_title)
1607
1608
1609 class YoutubeChannelIE(InfoExtractor):
1610     IE_DESC = u'YouTube.com channels'
1611     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1612     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1613     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1614     IE_NAME = u'youtube:channel'
1615
1616     def extract_videos_from_page(self, page):
1617         ids_in_page = []
1618         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1619             if mobj.group(1) not in ids_in_page:
1620                 ids_in_page.append(mobj.group(1))
1621         return ids_in_page
1622
1623     def _real_extract(self, url):
1624         # Extract channel id
1625         mobj = re.match(self._VALID_URL, url)
1626         if mobj is None:
1627             raise ExtractorError(u'Invalid URL: %s' % url)
1628
1629         # Download channel page
1630         channel_id = mobj.group(1)
1631         video_ids = []
1632         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1633         channel_page = self._download_webpage(url, channel_id)
1634         if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
1635             autogenerated = True
1636         else:
1637             autogenerated = False
1638
1639         if autogenerated:
1640             # The videos are contained in a single page
1641             # the ajax pages can't be used, they are empty
1642             video_ids = self.extract_videos_from_page(channel_page)
1643         else:
1644             # Download all channel pages using the json-based channel_ajax query
1645             for pagenum in itertools.count(1):
1646                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1647                 page = self._download_webpage(url, channel_id,
1648                                               u'Downloading page #%s' % pagenum)
1649
1650                 page = json.loads(page)
1651
1652                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1653                 video_ids.extend(ids_in_page)
1654
1655                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1656                     break
1657
1658         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1659
1660         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1661                        for video_id in video_ids]
1662         return self.playlist_result(url_entries, channel_id)
1663
1664
1665 class YoutubeUserIE(InfoExtractor):
1666     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1667     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1668     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1669     _GDATA_PAGE_SIZE = 50
1670     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1671     IE_NAME = u'youtube:user'
1672
1673     @classmethod
1674     def suitable(cls, url):
1675         # Don't return True if the url can be extracted with other youtube
1676         # extractor, the regex would is too permissive and it would match.
1677         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1678         if any(ie.suitable(url) for ie in other_ies): return False
1679         else: return super(YoutubeUserIE, cls).suitable(url)
1680
1681     def _real_extract(self, url):
1682         # Extract username
1683         mobj = re.match(self._VALID_URL, url)
1684         if mobj is None:
1685             raise ExtractorError(u'Invalid URL: %s' % url)
1686
1687         username = mobj.group(1)
1688
1689         # Download video ids using YouTube Data API. Result size per
1690         # query is limited (currently to 50 videos) so we need to query
1691         # page by page until there are no video ids - it means we got
1692         # all of them.
1693
1694         video_ids = []
1695
1696         for pagenum in itertools.count(0):
1697             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1698
1699             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1700             page = self._download_webpage(gdata_url, username,
1701                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1702
1703             try:
1704                 response = json.loads(page)
1705             except ValueError as err:
1706                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1707             if 'entry' not in response['feed']:
1708                 # Number of videos is a multiple of self._MAX_RESULTS
1709                 break
1710
1711             # Extract video identifiers
1712             ids_in_page = []
1713             for entry in response['feed']['entry']:
1714                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1715             video_ids.extend(ids_in_page)
1716
1717             # A little optimization - if current page is not
1718             # "full", ie. does not contain PAGE_SIZE video ids then
1719             # we can assume that this page is the last one - there
1720             # are no more ids on further pages - no need to query
1721             # again.
1722
1723             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1724                 break
1725
1726         url_results = [
1727             self.url_result(video_id, 'Youtube', video_id=video_id)
1728             for video_id in video_ids]
1729         return self.playlist_result(url_results, playlist_title=username)
1730
1731
1732 class YoutubeSearchIE(SearchInfoExtractor):
1733     IE_DESC = u'YouTube.com searches'
1734     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1735     _MAX_RESULTS = 1000
1736     IE_NAME = u'youtube:search'
1737     _SEARCH_KEY = 'ytsearch'
1738
1739     def report_download_page(self, query, pagenum):
1740         """Report attempt to download search page with given number."""
1741         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1742
1743     def _get_n_results(self, query, n):
1744         """Get a specified number of results for a query"""
1745
1746         video_ids = []
1747         pagenum = 0
1748         limit = n
1749
1750         while (50 * pagenum) < limit:
1751             self.report_download_page(query, pagenum+1)
1752             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1753             request = compat_urllib_request.Request(result_url)
1754             try:
1755                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1756             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1757                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1758             api_response = json.loads(data)['data']
1759
1760             if not 'items' in api_response:
1761                 raise ExtractorError(u'[youtube] No video results')
1762
1763             new_ids = list(video['id'] for video in api_response['items'])
1764             video_ids += new_ids
1765
1766             limit = min(n, api_response['totalItems'])
1767             pagenum += 1
1768
1769         if len(video_ids) > n:
1770             video_ids = video_ids[:n]
1771         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1772                   for video_id in video_ids]
1773         return self.playlist_result(videos, query)
1774
1775 class YoutubeSearchDateIE(YoutubeSearchIE):
1776     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1777     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1778     _SEARCH_KEY = 'ytsearchdate'
1779     IE_DESC = u'YouTube.com searches, newest videos first'
1780
1781 class YoutubeShowIE(InfoExtractor):
1782     IE_DESC = u'YouTube.com (multi-season) shows'
1783     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1784     IE_NAME = u'youtube:show'
1785
1786     def _real_extract(self, url):
1787         mobj = re.match(self._VALID_URL, url)
1788         show_name = mobj.group(1)
1789         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1790         # There's one playlist for each season of the show
1791         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1792         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1793         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1794
1795
1796 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1797     """
1798     Base class for extractors that fetch info from
1799     http://www.youtube.com/feed_ajax
1800     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1801     """
1802     _LOGIN_REQUIRED = True
1803     # use action_load_personal_feed instead of action_load_system_feed
1804     _PERSONAL_FEED = False
1805
1806     @property
1807     def _FEED_TEMPLATE(self):
1808         action = 'action_load_system_feed'
1809         if self._PERSONAL_FEED:
1810             action = 'action_load_personal_feed'
1811         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1812
1813     @property
1814     def IE_NAME(self):
1815         return u'youtube:%s' % self._FEED_NAME
1816
1817     def _real_initialize(self):
1818         self._login()
1819
1820     def _real_extract(self, url):
1821         feed_entries = []
1822         paging = 0
1823         for i in itertools.count(1):
1824             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1825                                           u'%s feed' % self._FEED_NAME,
1826                                           u'Downloading page %s' % i)
1827             info = json.loads(info)
1828             feed_html = info['feed_html']
1829             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1830             ids = orderedSet(m.group(1) for m in m_ids)
1831             feed_entries.extend(
1832                 self.url_result(video_id, 'Youtube', video_id=video_id)
1833                 for video_id in ids)
1834             if info['paging'] is None:
1835                 break
1836             paging = info['paging']
1837         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1838
1839 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1840     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1841     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1842     _FEED_NAME = 'subscriptions'
1843     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1844
1845 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1846     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1847     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1848     _FEED_NAME = 'recommended'
1849     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1850
1851 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1852     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1853     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1854     _FEED_NAME = 'watch_later'
1855     _PLAYLIST_TITLE = u'Youtube Watch Later'
1856     _PERSONAL_FEED = True
1857
1858 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1859     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1860     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1861     _FEED_NAME = 'history'
1862     _PERSONAL_FEED = True
1863     _PLAYLIST_TITLE = u'Youtube Watch History'
1864
1865 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1866     IE_NAME = u'youtube:favorites'
1867     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1868     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1869     _LOGIN_REQUIRED = True
1870
1871     def _real_extract(self, url):
1872         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1873         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1874         return self.url_result(playlist_id, 'YoutubePlaylist')
1875
1876
1877 class YoutubeTruncatedURLIE(InfoExtractor):
1878     IE_NAME = 'youtube:truncated_url'
1879     IE_DESC = False  # Do not list
1880     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1881
1882     def _real_extract(self, url):
1883         raise ExtractorError(
1884             u'Did you forget to quote the URL? Remember that & is a meta '
1885             u'character in most shells, so you want to put the URL in quotes, '
1886             u'like  youtube-dl '
1887             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1888             u' (or simply  youtube-dl BaW_jenozKc  ).',
1889             expected=True)