git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import struct
  11 import traceback
  12 import zlib
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from .subtitles import SubtitlesInfoExtractor
  16 from ..jsinterp import JSInterpreter
  17 from ..utils import (
  18     compat_chr,
  19     compat_parse_qs,
  20     compat_urllib_parse,
  21     compat_urllib_request,
  22     compat_urlparse,
  23     compat_str,
  24
  25     clean_html,
  26     get_cachedir,
  27     get_element_by_id,
  28     get_element_by_attribute,
  29     ExtractorError,
  30     int_or_none,
  31     PagedList,
  32     unescapeHTML,
  33     unified_strdate,
  34     orderedSet,
  35     write_json_file,
  36     uppercase_escape,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def _set_language(self):
  49         return bool(self._download_webpage(
  50             self._LANG_URL, None,
  51             note=u'Setting language', errnote='unable to set language',
  52             fatal=False))
  53
  54     def _login(self):
  55         (username, password) = self._get_login_info()
  56         # No authentication to be performed
  57         if username is None:
  58             if self._LOGIN_REQUIRED:
  59                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  60             return False
  61
  62         login_page = self._download_webpage(
  63             self._LOGIN_URL, None,
  64             note=u'Downloading login page',
  65             errnote=u'unable to fetch login page', fatal=False)
  66         if login_page is False:
  67             return
  68
  69         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  70                                   login_page, u'Login GALX parameter')
  71
  72         # Log in
  73         login_form_strs = {
  74                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  75                 u'Email': username,
  76                 u'GALX': galx,
  77                 u'Passwd': password,
  78                 u'PersistentCookie': u'yes',
  79                 u'_utf8': u'霱',
  80                 u'bgresponse': u'js_disabled',
  81                 u'checkConnection': u'',
  82                 u'checkedDomains': u'youtube',
  83                 u'dnConn': u'',
  84                 u'pstMsg': u'0',
  85                 u'rmShown': u'1',
  86                 u'secTok': u'',
  87                 u'signIn': u'Sign in',
  88                 u'timeStmp': u'',
  89                 u'service': u'youtube',
  90                 u'uilel': u'3',
  91                 u'hl': u'en_US',
  92         }
  93         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
  94         # chokes on unicode
  95         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
  96         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
  97
  98         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
  99         login_results = self._download_webpage(
 100             req, None,
 101             note=u'Logging in', errnote=u'unable to log in', fatal=False)
 102         if login_results is False:
 103             return False
 104         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 105             self._downloader.report_warning(u'unable to log in: bad username or password')
 106             return False
 107         return True
 108
 109     def _confirm_age(self):
 110         age_form = {
 111             'next_url': '/',
 112             'action_confirm': 'Confirm',
 113         }
 114         req = compat_urllib_request.Request(self._AGE_URL,
 115             compat_urllib_parse.urlencode(age_form).encode('ascii'))
 116
 117         self._download_webpage(
 118             req, None,
 119             note=u'Confirming age', errnote=u'Unable to confirm age')
 120         return True
 121
 122     def _real_initialize(self):
 123         if self._downloader is None:
 124             return
 125         if not self._set_language():
 126             return
 127         if not self._login():
 128             return
 129         self._confirm_age()
 130
 131
 132 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 133     IE_DESC = u'YouTube.com'
 134     _VALID_URL = r"""(?x)^
 135                      (
 136                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
 137                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 138                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 139                             (?:www\.)?pwnyoutube\.com/|
 140                             (?:www\.)?yourepeat\.com/|
 141                             tube\.majestyc\.net/|
 142                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 143                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 144                          (?:                                                  # the various things that can precede the ID:
 145                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 146                              |(?:                                             # or the v= param in all its forms
 147                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 148                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 149                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 150                                  v=
 151                              )
 152                          ))
 153                          |youtu\.be/                                          # just youtu.be/xxxx
 154                          )
 155                      )?                                                       # all until now is optional -> you can pass the naked ID
 156                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 157                      (?(1).+)?                                                # if we found the ID, everything can follow
 158                      $"""
 159     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 160     _formats = {
 161         '5': {'ext': 'flv', 'width': 400, 'height': 240},
 162         '6': {'ext': 'flv', 'width': 450, 'height': 270},
 163         '13': {'ext': '3gp'},
 164         '17': {'ext': '3gp', 'width': 176, 'height': 144},
 165         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
 166         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
 167         '34': {'ext': 'flv', 'width': 640, 'height': 360},
 168         '35': {'ext': 'flv', 'width': 854, 'height': 480},
 169         '36': {'ext': '3gp', 'width': 320, 'height': 240},
 170         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
 171         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
 172         '43': {'ext': 'webm', 'width': 640, 'height': 360},
 173         '44': {'ext': 'webm', 'width': 854, 'height': 480},
 174         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
 175         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
 176
 177
 178         # 3d videos
 179         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
 180         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
 181         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
 182         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
 183         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
 184         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
 185         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 186
 187         # Apple HTTP Live Streaming
 188         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 189         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
 190         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
 191         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
 192         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
 193         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 194         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 195
 196         # DASH mp4 video
 197         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 198         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 199         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 200         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 201         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 202         '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 203         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 204         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 205
 206         # Dash mp4 audio
 207         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
 208         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
 209         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 210
 211         # Dash webm
 212         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 213         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 214         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 215         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 216         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 217         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
 218         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
 219         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
 220         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
 221         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
 222         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
 223         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
 224         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
 225
 226         # Dash webm audio
 227         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
 228         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
 229
 230         # RTMP (unnamed)
 231         '_rtmp': {'protocol': 'rtmp'},
 232     }
 233
 234     IE_NAME = u'youtube'
 235     _TESTS = [
 236         {
 237             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 238             u"file":  u"BaW_jenozKc.mp4",
 239             u"info_dict": {
 240                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 241                 u"uploader": u"Philipp Hagemeister",
 242                 u"uploader_id": u"phihag",
 243                 u"upload_date": u"20121002",
 244                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 245             }
 246         },
 247         {
 248             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 249             u"file":  u"UxxajLWwzqY.mp4",
 250             u"note": u"Test generic use_cipher_signature video (#897)",
 251             u"info_dict": {
 252                 u"upload_date": u"20120506",
 253                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 254                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 255                 u"uploader": u"Icona Pop",
 256                 u"uploader_id": u"IconaPop"
 257             }
 258         },
 259         {
 260             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 261             u"file":  u"07FYdnEawAQ.mp4",
 262             u"note": u"Test VEVO video with age protection (#956)",
 263             u"info_dict": {
 264                 u"upload_date": u"20130703",
 265                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 266                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 267                 u"uploader": u"justintimberlakeVEVO",
 268                 u"uploader_id": u"justintimberlakeVEVO"
 269             }
 270         },
 271         {
 272             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
 273             u"file":  u"yZIXLfi8CZQ.mp4",
 274             u"note": u"Embed-only video (#1746)",
 275             u"info_dict": {
 276                 u"upload_date": u"20120608",
 277                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
 278                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
 279                 u"uploader": u"SET India",
 280                 u"uploader_id": u"setindia"
 281             }
 282         },
 283         {
 284             u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
 285             u"file": u"a9LDPn-MO4I.m4a",
 286             u"note": u"256k DASH audio (format 141) via DASH manifest",
 287             u"info_dict": {
 288                 u"upload_date": "20121002",
 289                 u"uploader_id": "8KVIDEO",
 290                 u"description": "No description available.",
 291                 u"uploader": "8KVIDEO",
 292                 u"title": "UHDTV TEST 8K VIDEO.mp4"
 293             },
 294             u"params": {
 295                 u"youtube_include_dash_manifest": True,
 296                 u"format": "141",
 297             },
 298         },
 299         # DASH manifest with encrypted signature
 300         {
 301             u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 302             u'info_dict': {
 303                 u'id': u'IB3lcPjvWLA',
 304                 u'ext': u'm4a',
 305                 u'title': u'Afrojack - The Spark ft. Spree Wilson',
 306                 u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
 307                 u'uploader': u'AfrojackVEVO',
 308                 u'uploader_id': u'AfrojackVEVO',
 309                 u'upload_date': u'20131011',
 310             },
 311             u"params": {
 312                 u'youtube_include_dash_manifest': True,
 313                 u'format': '141',
 314             },
 315         },
 316     ]
 317
 318
 319     @classmethod
 320     def suitable(cls, url):
 321         """Receives a URL and returns True if suitable for this IE."""
 322         if YoutubePlaylistIE.suitable(url): return False
 323         return re.match(cls._VALID_URL, url) is not None
 324
 325     def __init__(self, *args, **kwargs):
 326         super(YoutubeIE, self).__init__(*args, **kwargs)
 327         self._player_cache = {}
 328
 329     def report_video_info_webpage_download(self, video_id):
 330         """Report attempt to download video info webpage."""
 331         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 332
 333     def report_information_extraction(self, video_id):
 334         """Report attempt to extract video information."""
 335         self.to_screen(u'%s: Extracting video information' % video_id)
 336
 337     def report_unavailable_format(self, video_id, format):
 338         """Report extracted video URL."""
 339         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 340
 341     def report_rtmp_download(self):
 342         """Indicate the download will use the RTMP protocol."""
 343         self.to_screen(u'RTMP download detected')
 344
 345     def _extract_signature_function(self, video_id, player_url, slen):
 346         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 347                         player_url)
 348         player_type = id_m.group('ext')
 349         player_id = id_m.group('id')
 350
 351         # Read from filesystem cache
 352         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 353         assert os.path.basename(func_id) == func_id
 354         cache_dir = get_cachedir(self._downloader.params)
 355
 356         cache_enabled = cache_dir is not None
 357         if cache_enabled:
 358             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 359                                     u'youtube-sigfuncs',
 360                                     func_id + '.json')
 361             try:
 362                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 363                     cache_spec = json.load(cachef)
 364                 return lambda s: u''.join(s[i] for i in cache_spec)
 365             except IOError:
 366                 pass  # No cache available
 367
 368         if player_type == 'js':
 369             code = self._download_webpage(
 370                 player_url, video_id,
 371                 note=u'Downloading %s player %s' % (player_type, player_id),
 372                 errnote=u'Download of %s failed' % player_url)
 373             res = self._parse_sig_js(code)
 374         elif player_type == 'swf':
 375             urlh = self._request_webpage(
 376                 player_url, video_id,
 377                 note=u'Downloading %s player %s' % (player_type, player_id),
 378                 errnote=u'Download of %s failed' % player_url)
 379             code = urlh.read()
 380             res = self._parse_sig_swf(code)
 381         else:
 382             assert False, 'Invalid player type %r' % player_type
 383
 384         if cache_enabled:
 385             try:
 386                 test_string = u''.join(map(compat_chr, range(slen)))
 387                 cache_res = res(test_string)
 388                 cache_spec = [ord(c) for c in cache_res]
 389                 try:
 390                     os.makedirs(os.path.dirname(cache_fn))
 391                 except OSError as ose:
 392                     if ose.errno != errno.EEXIST:
 393                         raise
 394                 write_json_file(cache_spec, cache_fn)
 395             except Exception:
 396                 tb = traceback.format_exc()
 397                 self._downloader.report_warning(
 398                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 399
 400         return res
 401
 402     def _print_sig_code(self, func, slen):
 403         def gen_sig_code(idxs):
 404             def _genslice(start, end, step):
 405                 starts = u'' if start == 0 else str(start)
 406                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 407                 steps = u'' if step == 1 else (u':%d' % step)
 408                 return u's[%s%s%s]' % (starts, ends, steps)
 409
 410             step = None
 411             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 412                                     # set as soon as step is set
 413             for i, prev in zip(idxs[1:], idxs[:-1]):
 414                 if step is not None:
 415                     if i - prev == step:
 416                         continue
 417                     yield _genslice(start, prev, step)
 418                     step = None
 419                     continue
 420                 if i - prev in [-1, 1]:
 421                     step = i - prev
 422                     start = prev
 423                     continue
 424                 else:
 425                     yield u's[%d]' % prev
 426             if step is None:
 427                 yield u's[%d]' % i
 428             else:
 429                 yield _genslice(start, i, step)
 430
 431         test_string = u''.join(map(compat_chr, range(slen)))
 432         cache_res = func(test_string)
 433         cache_spec = [ord(c) for c in cache_res]
 434         expr_code = u' + '.join(gen_sig_code(cache_spec))
 435         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 436         self.to_screen(u'Extracted signature function:\n' + code)
 437
 438     def _parse_sig_js(self, jscode):
 439         funcname = self._search_regex(
 440             r'signature=([a-zA-Z]+)', jscode,
 441              u'Initial JS player signature function name')
 442
 443         jsi = JSInterpreter(jscode)
 444         initial_function = jsi.extract_function(funcname)
 445         return lambda s: initial_function([s])
 446
 447     def _parse_sig_swf(self, file_contents):
 448         if file_contents[1:3] != b'WS':
 449             raise ExtractorError(
 450                 u'Not an SWF file; header is %r' % file_contents[:3])
 451         if file_contents[:1] == b'C':
 452             content = zlib.decompress(file_contents[8:])
 453         else:
 454             raise NotImplementedError(u'Unsupported compression format %r' %
 455                                       file_contents[:1])
 456
 457         def extract_tags(content):
 458             pos = 0
 459             while pos < len(content):
 460                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 461                 pos += 2
 462                 tag_code = header16 >> 6
 463                 tag_len = header16 & 0x3f
 464                 if tag_len == 0x3f:
 465                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 466                     pos += 4
 467                 assert pos+tag_len <= len(content)
 468                 yield (tag_code, content[pos:pos+tag_len])
 469                 pos += tag_len
 470
 471         code_tag = next(tag
 472                         for tag_code, tag in extract_tags(content)
 473                         if tag_code == 82)
 474         p = code_tag.index(b'\0', 4) + 1
 475         code_reader = io.BytesIO(code_tag[p:])
 476
 477         # Parse ABC (AVM2 ByteCode)
 478         def read_int(reader=None):
 479             if reader is None:
 480                 reader = code_reader
 481             res = 0
 482             shift = 0
 483             for _ in range(5):
 484                 buf = reader.read(1)
 485                 assert len(buf) == 1
 486                 b = struct.unpack('<B', buf)[0]
 487                 res = res | ((b & 0x7f) << shift)
 488                 if b & 0x80 == 0:
 489                     break
 490                 shift += 7
 491             return res
 492
 493         def u30(reader=None):
 494             res = read_int(reader)
 495             assert res & 0xf0000000 == 0
 496             return res
 497         u32 = read_int
 498
 499         def s32(reader=None):
 500             v = read_int(reader)
 501             if v & 0x80000000 != 0:
 502                 v = - ((v ^ 0xffffffff) + 1)
 503             return v
 504
 505         def read_string(reader=None):
 506             if reader is None:
 507                 reader = code_reader
 508             slen = u30(reader)
 509             resb = reader.read(slen)
 510             assert len(resb) == slen
 511             return resb.decode('utf-8')
 512
 513         def read_bytes(count, reader=None):
 514             if reader is None:
 515                 reader = code_reader
 516             resb = reader.read(count)
 517             assert len(resb) == count
 518             return resb
 519
 520         def read_byte(reader=None):
 521             resb = read_bytes(1, reader=reader)
 522             res = struct.unpack('<B', resb)[0]
 523             return res
 524
 525         # minor_version + major_version
 526         read_bytes(2 + 2)
 527
 528         # Constant pool
 529         int_count = u30()
 530         for _c in range(1, int_count):
 531             s32()
 532         uint_count = u30()
 533         for _c in range(1, uint_count):
 534             u32()
 535         double_count = u30()
 536         read_bytes((double_count-1) * 8)
 537         string_count = u30()
 538         constant_strings = [u'']
 539         for _c in range(1, string_count):
 540             s = read_string()
 541             constant_strings.append(s)
 542         namespace_count = u30()
 543         for _c in range(1, namespace_count):
 544             read_bytes(1)  # kind
 545             u30()  # name
 546         ns_set_count = u30()
 547         for _c in range(1, ns_set_count):
 548             count = u30()
 549             for _c2 in range(count):
 550                 u30()
 551         multiname_count = u30()
 552         MULTINAME_SIZES = {
 553             0x07: 2,  # QName
 554             0x0d: 2,  # QNameA
 555             0x0f: 1,  # RTQName
 556             0x10: 1,  # RTQNameA
 557             0x11: 0,  # RTQNameL
 558             0x12: 0,  # RTQNameLA
 559             0x09: 2,  # Multiname
 560             0x0e: 2,  # MultinameA
 561             0x1b: 1,  # MultinameL
 562             0x1c: 1,  # MultinameLA
 563         }
 564         multinames = [u'']
 565         for _c in range(1, multiname_count):
 566             kind = u30()
 567             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 568             if kind == 0x07:
 569                 u30()  # namespace_idx
 570                 name_idx = u30()
 571                 multinames.append(constant_strings[name_idx])
 572             else:
 573                 multinames.append('[MULTINAME kind: %d]' % kind)
 574                 for _c2 in range(MULTINAME_SIZES[kind]):
 575                     u30()
 576
 577         # Methods
 578         method_count = u30()
 579         MethodInfo = collections.namedtuple(
 580             'MethodInfo',
 581             ['NEED_ARGUMENTS', 'NEED_REST'])
 582         method_infos = []
 583         for method_id in range(method_count):
 584             param_count = u30()
 585             u30()  # return type
 586             for _ in range(param_count):
 587                 u30()  # param type
 588             u30()  # name index (always 0 for youtube)
 589             flags = read_byte()
 590             if flags & 0x08 != 0:
 591                 # Options present
 592                 option_count = u30()
 593                 for c in range(option_count):
 594                     u30()  # val
 595                     read_bytes(1)  # kind
 596             if flags & 0x80 != 0:
 597                 # Param names present
 598                 for _ in range(param_count):
 599                     u30()  # param name
 600             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 601             method_infos.append(mi)
 602
 603         # Metadata
 604         metadata_count = u30()
 605         for _c in range(metadata_count):
 606             u30()  # name
 607             item_count = u30()
 608             for _c2 in range(item_count):
 609                 u30()  # key
 610                 u30()  # value
 611
 612         def parse_traits_info():
 613             trait_name_idx = u30()
 614             kind_full = read_byte()
 615             kind = kind_full & 0x0f
 616             attrs = kind_full >> 4
 617             methods = {}
 618             if kind in [0x00, 0x06]:  # Slot or Const
 619                 u30()  # Slot id
 620                 u30()  # type_name_idx
 621                 vindex = u30()
 622                 if vindex != 0:
 623                     read_byte()  # vkind
 624             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 625                 u30()  # disp_id
 626                 method_idx = u30()
 627                 methods[multinames[trait_name_idx]] = method_idx
 628             elif kind == 0x04:  # Class
 629                 u30()  # slot_id
 630                 u30()  # classi
 631             elif kind == 0x05:  # Function
 632                 u30()  # slot_id
 633                 function_idx = u30()
 634                 methods[function_idx] = multinames[trait_name_idx]
 635             else:
 636                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 637
 638             if attrs & 0x4 != 0:  # Metadata present
 639                 metadata_count = u30()
 640                 for _c3 in range(metadata_count):
 641                     u30()  # metadata index
 642
 643             return methods
 644
 645         # Classes
 646         TARGET_CLASSNAME = u'SignatureDecipher'
 647         searched_idx = multinames.index(TARGET_CLASSNAME)
 648         searched_class_id = None
 649         class_count = u30()
 650         for class_id in range(class_count):
 651             name_idx = u30()
 652             if name_idx == searched_idx:
 653                 # We found the class we're looking for!
 654                 searched_class_id = class_id
 655             u30()  # super_name idx
 656             flags = read_byte()
 657             if flags & 0x08 != 0:  # Protected namespace is present
 658                 u30()  # protected_ns_idx
 659             intrf_count = u30()
 660             for _c2 in range(intrf_count):
 661                 u30()
 662             u30()  # iinit
 663             trait_count = u30()
 664             for _c2 in range(trait_count):
 665                 parse_traits_info()
 666
 667         if searched_class_id is None:
 668             raise ExtractorError(u'Target class %r not found' %
 669                                  TARGET_CLASSNAME)
 670
 671         method_names = {}
 672         method_idxs = {}
 673         for class_id in range(class_count):
 674             u30()  # cinit
 675             trait_count = u30()
 676             for _c2 in range(trait_count):
 677                 trait_methods = parse_traits_info()
 678                 if class_id == searched_class_id:
 679                     method_names.update(trait_methods.items())
 680                     method_idxs.update(dict(
 681                         (idx, name)
 682                         for name, idx in trait_methods.items()))
 683
 684         # Scripts
 685         script_count = u30()
 686         for _c in range(script_count):
 687             u30()  # init
 688             trait_count = u30()
 689             for _c2 in range(trait_count):
 690                 parse_traits_info()
 691
 692         # Method bodies
 693         method_body_count = u30()
 694         Method = collections.namedtuple('Method', ['code', 'local_count'])
 695         methods = {}
 696         for _c in range(method_body_count):
 697             method_idx = u30()
 698             u30()  # max_stack
 699             local_count = u30()
 700             u30()  # init_scope_depth
 701             u30()  # max_scope_depth
 702             code_length = u30()
 703             code = read_bytes(code_length)
 704             if method_idx in method_idxs:
 705                 m = Method(code, local_count)
 706                 methods[method_idxs[method_idx]] = m
 707             exception_count = u30()
 708             for _c2 in range(exception_count):
 709                 u30()  # from
 710                 u30()  # to
 711                 u30()  # target
 712                 u30()  # exc_type
 713                 u30()  # var_name
 714             trait_count = u30()
 715             for _c2 in range(trait_count):
 716                 parse_traits_info()
 717
 718         assert p + code_reader.tell() == len(code_tag)
 719         assert len(methods) == len(method_idxs)
 720
 721         method_pyfunctions = {}
 722
 723         def extract_function(func_name):
 724             if func_name in method_pyfunctions:
 725                 return method_pyfunctions[func_name]
 726             if func_name not in methods:
 727                 raise ExtractorError(u'Cannot find function %r' % func_name)
 728             m = methods[func_name]
 729
 730             def resfunc(args):
 731                 registers = ['(this)'] + list(args) + [None] * m.local_count
 732                 stack = []
 733                 coder = io.BytesIO(m.code)
 734                 while True:
 735                     opcode = struct.unpack('!B', coder.read(1))[0]
 736                     if opcode == 36:  # pushbyte
 737                         v = struct.unpack('!B', coder.read(1))[0]
 738                         stack.append(v)
 739                     elif opcode == 44:  # pushstring
 740                         idx = u30(coder)
 741                         stack.append(constant_strings[idx])
 742                     elif opcode == 48:  # pushscope
 743                         # We don't implement the scope register, so we'll just
 744                         # ignore the popped value
 745                         stack.pop()
 746                     elif opcode == 70:  # callproperty
 747                         index = u30(coder)
 748                         mname = multinames[index]
 749                         arg_count = u30(coder)
 750                         args = list(reversed(
 751                             [stack.pop() for _ in range(arg_count)]))
 752                         obj = stack.pop()
 753                         if mname == u'split':
 754                             assert len(args) == 1
 755                             assert isinstance(args[0], compat_str)
 756                             assert isinstance(obj, compat_str)
 757                             if args[0] == u'':
 758                                 res = list(obj)
 759                             else:
 760                                 res = obj.split(args[0])
 761                             stack.append(res)
 762                         elif mname == u'slice':
 763                             assert len(args) == 1
 764                             assert isinstance(args[0], int)
 765                             assert isinstance(obj, list)
 766                             res = obj[args[0]:]
 767                             stack.append(res)
 768                         elif mname == u'join':
 769                             assert len(args) == 1
 770                             assert isinstance(args[0], compat_str)
 771                             assert isinstance(obj, list)
 772                             res = args[0].join(obj)
 773                             stack.append(res)
 774                         elif mname in method_pyfunctions:
 775                             stack.append(method_pyfunctions[mname](args))
 776                         else:
 777                             raise NotImplementedError(
 778                                 u'Unsupported property %r on %r'
 779                                 % (mname, obj))
 780                     elif opcode == 72:  # returnvalue
 781                         res = stack.pop()
 782                         return res
 783                     elif opcode == 79:  # callpropvoid
 784                         index = u30(coder)
 785                         mname = multinames[index]
 786                         arg_count = u30(coder)
 787                         args = list(reversed(
 788                             [stack.pop() for _ in range(arg_count)]))
 789                         obj = stack.pop()
 790                         if mname == u'reverse':
 791                             assert isinstance(obj, list)
 792                             obj.reverse()
 793                         else:
 794                             raise NotImplementedError(
 795                                 u'Unsupported (void) property %r on %r'
 796                                 % (mname, obj))
 797                     elif opcode == 93:  # findpropstrict
 798                         index = u30(coder)
 799                         mname = multinames[index]
 800                         res = extract_function(mname)
 801                         stack.append(res)
 802                     elif opcode == 97:  # setproperty
 803                         index = u30(coder)
 804                         value = stack.pop()
 805                         idx = stack.pop()
 806                         obj = stack.pop()
 807                         assert isinstance(obj, list)
 808                         assert isinstance(idx, int)
 809                         obj[idx] = value
 810                     elif opcode == 98:  # getlocal
 811                         index = u30(coder)
 812                         stack.append(registers[index])
 813                     elif opcode == 99:  # setlocal
 814                         index = u30(coder)
 815                         value = stack.pop()
 816                         registers[index] = value
 817                     elif opcode == 102:  # getproperty
 818                         index = u30(coder)
 819                         pname = multinames[index]
 820                         if pname == u'length':
 821                             obj = stack.pop()
 822                             assert isinstance(obj, list)
 823                             stack.append(len(obj))
 824                         else:  # Assume attribute access
 825                             idx = stack.pop()
 826                             assert isinstance(idx, int)
 827                             obj = stack.pop()
 828                             assert isinstance(obj, list)
 829                             stack.append(obj[idx])
 830                     elif opcode == 128:  # coerce
 831                         u30(coder)
 832                     elif opcode == 133:  # coerce_s
 833                         assert isinstance(stack[-1], (type(None), compat_str))
 834                     elif opcode == 164:  # modulo
 835                         value2 = stack.pop()
 836                         value1 = stack.pop()
 837                         res = value1 % value2
 838                         stack.append(res)
 839                     elif opcode == 208:  # getlocal_0
 840                         stack.append(registers[0])
 841                     elif opcode == 209:  # getlocal_1
 842                         stack.append(registers[1])
 843                     elif opcode == 210:  # getlocal_2
 844                         stack.append(registers[2])
 845                     elif opcode == 211:  # getlocal_3
 846                         stack.append(registers[3])
 847                     elif opcode == 214:  # setlocal_2
 848                         registers[2] = stack.pop()
 849                     elif opcode == 215:  # setlocal_3
 850                         registers[3] = stack.pop()
 851                     else:
 852                         raise NotImplementedError(
 853                             u'Unsupported opcode %d' % opcode)
 854
 855             method_pyfunctions[func_name] = resfunc
 856             return resfunc
 857
 858         initial_function = extract_function(u'decipher')
 859         return lambda s: initial_function([s])
 860
 861     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 862         """Turn the encrypted s field into a working signature"""
 863
 864         if player_url is not None:
 865             if player_url.startswith(u'//'):
 866                 player_url = u'https:' + player_url
 867             try:
 868                 player_id = (player_url, len(s))
 869                 if player_id not in self._player_cache:
 870                     func = self._extract_signature_function(
 871                         video_id, player_url, len(s)
 872                     )
 873                     self._player_cache[player_id] = func
 874                 func = self._player_cache[player_id]
 875                 if self._downloader.params.get('youtube_print_sig_code'):
 876                     self._print_sig_code(func, len(s))
 877                 return func(s)
 878             except Exception:
 879                 tb = traceback.format_exc()
 880                 self._downloader.report_warning(
 881                     u'Automatic signature extraction failed: ' + tb)
 882
 883             self._downloader.report_warning(
 884                 u'Warning: Falling back to static signature algorithm')
 885
 886         return self._static_decrypt_signature(
 887             s, video_id, player_url, age_gate)
 888
 889     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
 890         if age_gate:
 891             # The videos with age protection use another player, so the
 892             # algorithms can be different.
 893             if len(s) == 86:
 894                 return s[2:63] + s[82] + s[64:82] + s[63]
 895
 896         if len(s) == 93:
 897             return s[86:29:-1] + s[88] + s[28:5:-1]
 898         elif len(s) == 92:
 899             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
 900         elif len(s) == 91:
 901             return s[84:27:-1] + s[86] + s[26:5:-1]
 902         elif len(s) == 90:
 903             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
 904         elif len(s) == 89:
 905             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
 906         elif len(s) == 88:
 907             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
 908         elif len(s) == 87:
 909             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
 910         elif len(s) == 86:
 911             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
 912         elif len(s) == 85:
 913             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
 914         elif len(s) == 84:
 915             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
 916         elif len(s) == 83:
 917             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
 918         elif len(s) == 82:
 919             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
 920         elif len(s) == 81:
 921             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
 922         elif len(s) == 80:
 923             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
 924         elif len(s) == 79:
 925             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
 926
 927         else:
 928             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
 929
 930     def _get_available_subtitles(self, video_id, webpage):
 931         try:
 932             sub_list = self._download_webpage(
 933                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
 934                 video_id, note=False)
 935         except ExtractorError as err:
 936             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
 937             return {}
 938         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
 939
 940         sub_lang_list = {}
 941         for l in lang_list:
 942             lang = l[1]
 943             params = compat_urllib_parse.urlencode({
 944                 'lang': lang,
 945                 'v': video_id,
 946                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
 947                 'name': unescapeHTML(l[0]).encode('utf-8'),
 948             })
 949             url = u'https://www.youtube.com/api/timedtext?' + params
 950             sub_lang_list[lang] = url
 951         if not sub_lang_list:
 952             self._downloader.report_warning(u'video doesn\'t have subtitles')
 953             return {}
 954         return sub_lang_list
 955
 956     def _get_available_automatic_caption(self, video_id, webpage):
 957         """We need the webpage for getting the captions url, pass it as an
 958            argument to speed up the process."""
 959         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
 960         self.to_screen(u'%s: Looking for automatic captions' % video_id)
 961         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
 962         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
 963         if mobj is None:
 964             self._downloader.report_warning(err_msg)
 965             return {}
 966         player_config = json.loads(mobj.group(1))
 967         try:
 968             args = player_config[u'args']
 969             caption_url = args[u'ttsurl']
 970             timestamp = args[u'timestamp']
 971             # We get the available subtitles
 972             list_params = compat_urllib_parse.urlencode({
 973                 'type': 'list',
 974                 'tlangs': 1,
 975                 'asrs': 1,
 976             })
 977             list_url = caption_url + '&' + list_params
 978             caption_list = self._download_xml(list_url, video_id)
 979             original_lang_node = caption_list.find('track')
 980             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
 981                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
 982                 return {}
 983             original_lang = original_lang_node.attrib['lang_code']
 984
 985             sub_lang_list = {}
 986             for lang_node in caption_list.findall('target'):
 987                 sub_lang = lang_node.attrib['lang_code']
 988                 params = compat_urllib_parse.urlencode({
 989                     'lang': original_lang,
 990                     'tlang': sub_lang,
 991                     'fmt': sub_format,
 992                     'ts': timestamp,
 993                     'kind': 'asr',
 994                 })
 995                 sub_lang_list[sub_lang] = caption_url + '&' + params
 996             return sub_lang_list
 997         # An extractor error can be raise by the download process if there are
 998         # no automatic captions but there are subtitles
 999         except (KeyError, ExtractorError):
1000             self._downloader.report_warning(err_msg)
1001             return {}
1002
1003     @classmethod
1004     def extract_id(cls, url):
1005         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1006         if mobj is None:
1007             raise ExtractorError(u'Invalid URL: %s' % url)
1008         video_id = mobj.group(2)
1009         return video_id
1010
1011     def _extract_from_m3u8(self, manifest_url, video_id):
1012         url_map = {}
1013         def _get_urls(_manifest):
1014             lines = _manifest.split('\n')
1015             urls = filter(lambda l: l and not l.startswith('#'),
1016                             lines)
1017             return urls
1018         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1019         formats_urls = _get_urls(manifest)
1020         for format_url in formats_urls:
1021             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1022             url_map[itag] = format_url
1023         return url_map
1024
1025     def _extract_annotations(self, video_id):
1026         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1027         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1028
1029     def _real_extract(self, url):
1030         proto = (
1031             u'http' if self._downloader.params.get('prefer_insecure', False)
1032             else u'https')
1033
1034         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1035         mobj = re.search(self._NEXT_URL_RE, url)
1036         if mobj:
1037             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1038         video_id = self.extract_id(url)
1039
1040         # Get video webpage
1041         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1042         video_webpage = self._download_webpage(url, video_id)
1043
1044         # Attempt to extract SWF player URL
1045         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1046         if mobj is not None:
1047             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1048         else:
1049             player_url = None
1050
1051         # Get video info
1052         self.report_video_info_webpage_download(video_id)
1053         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1054             self.report_age_confirmation()
1055             age_gate = True
1056             # We simulate the access to the video from www.youtube.com/v/{video_id}
1057             # this can be viewed without login into Youtube
1058             data = compat_urllib_parse.urlencode({'video_id': video_id,
1059                                                   'el': 'player_embedded',
1060                                                   'gl': 'US',
1061                                                   'hl': 'en',
1062                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1063                                                   'asv': 3,
1064                                                   'sts':'1588',
1065                                                   })
1066             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1067             video_info_webpage = self._download_webpage(video_info_url, video_id,
1068                                     note=False,
1069                                     errnote='unable to download video info webpage')
1070             video_info = compat_parse_qs(video_info_webpage)
1071         else:
1072             age_gate = False
1073             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1074                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1075                         % (video_id, el_type))
1076                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1077                                         note=False,
1078                                         errnote='unable to download video info webpage')
1079                 video_info = compat_parse_qs(video_info_webpage)
1080                 if 'token' in video_info:
1081                     break
1082         if 'token' not in video_info:
1083             if 'reason' in video_info:
1084                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1085             else:
1086                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1087
1088         if 'view_count' in video_info:
1089             view_count = int(video_info['view_count'][0])
1090         else:
1091             view_count = None
1092
1093         # Check for "rental" videos
1094         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1095             raise ExtractorError(u'"rental" videos not supported')
1096
1097         # Start extracting information
1098         self.report_information_extraction(video_id)
1099
1100         # uploader
1101         if 'author' not in video_info:
1102             raise ExtractorError(u'Unable to extract uploader name')
1103         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1104
1105         # uploader_id
1106         video_uploader_id = None
1107         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1108         if mobj is not None:
1109             video_uploader_id = mobj.group(1)
1110         else:
1111             self._downloader.report_warning(u'unable to extract uploader nickname')
1112
1113         # title
1114         if 'title' in video_info:
1115             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1116         else:
1117             self._downloader.report_warning(u'Unable to extract video title')
1118             video_title = u'_'
1119
1120         # thumbnail image
1121         # We try first to get a high quality image:
1122         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1123                             video_webpage, re.DOTALL)
1124         if m_thumb is not None:
1125             video_thumbnail = m_thumb.group(1)
1126         elif 'thumbnail_url' not in video_info:
1127             self._downloader.report_warning(u'unable to extract video thumbnail')
1128             video_thumbnail = None
1129         else:   # don't panic if we can't find it
1130             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1131
1132         # upload date
1133         upload_date = None
1134         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1135         if mobj is not None:
1136             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1137             upload_date = unified_strdate(upload_date)
1138
1139         # description
1140         video_description = get_element_by_id("eow-description", video_webpage)
1141         if video_description:
1142             video_description = re.sub(r'''(?x)
1143                 <a\s+
1144                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1145                     title="([^"]+)"\s+
1146                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1147                     class="yt-uix-redirect-link"\s*>
1148                 [^<]+
1149                 </a>
1150             ''', r'\1', video_description)
1151             video_description = clean_html(video_description)
1152         else:
1153             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1154             if fd_mobj:
1155                 video_description = unescapeHTML(fd_mobj.group(1))
1156             else:
1157                 video_description = u''
1158
1159         def _extract_count(klass):
1160             count = self._search_regex(
1161                 r'class="%s">([\d,]+)</span>' % re.escape(klass),
1162                 video_webpage, klass, default=None)
1163             if count is not None:
1164                 return int(count.replace(',', ''))
1165             return None
1166         like_count = _extract_count(u'likes-count')
1167         dislike_count = _extract_count(u'dislikes-count')
1168
1169         # subtitles
1170         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1171
1172         if self._downloader.params.get('listsubtitles', False):
1173             self._list_available_subtitles(video_id, video_webpage)
1174             return
1175
1176         if 'length_seconds' not in video_info:
1177             self._downloader.report_warning(u'unable to extract video duration')
1178             video_duration = None
1179         else:
1180             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
1181
1182         # annotations
1183         video_annotations = None
1184         if self._downloader.params.get('writeannotations', False):
1185                 video_annotations = self._extract_annotations(video_id)
1186
1187         # Decide which formats to download
1188         try:
1189             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1190             if not mobj:
1191                 raise ValueError('Could not find vevo ID')
1192             json_code = uppercase_escape(mobj.group(1))
1193             ytplayer_config = json.loads(json_code)
1194             args = ytplayer_config['args']
1195             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1196             # this signatures are encrypted
1197             if 'url_encoded_fmt_stream_map' not in args:
1198                 raise ValueError(u'No stream_map present')  # caught below
1199             re_signature = re.compile(r'[&,]s=')
1200             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1201             if m_s is not None:
1202                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1203                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1204             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1205             if m_s is not None:
1206                 if 'adaptive_fmts' in video_info:
1207                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1208                 else:
1209                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1210         except ValueError:
1211             pass
1212
1213         def _map_to_format_list(urlmap):
1214             formats = []
1215             for itag, video_real_url in urlmap.items():
1216                 dct = {
1217                     'format_id': itag,
1218                     'url': video_real_url,
1219                     'player_url': player_url,
1220                 }
1221                 if itag in self._formats:
1222                     dct.update(self._formats[itag])
1223                 formats.append(dct)
1224             return formats
1225
1226         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1227             self.report_rtmp_download()
1228             formats = [{
1229                 'format_id': '_rtmp',
1230                 'protocol': 'rtmp',
1231                 'url': video_info['conn'][0],
1232                 'player_url': player_url,
1233             }]
1234         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1235             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1236             if 'rtmpe%3Dyes' in encoded_url_map:
1237                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1238             url_map = {}
1239             for url_data_str in encoded_url_map.split(','):
1240                 url_data = compat_parse_qs(url_data_str)
1241                 if 'itag' in url_data and 'url' in url_data:
1242                     url = url_data['url'][0]
1243                     if 'sig' in url_data:
1244                         url += '&signature=' + url_data['sig'][0]
1245                     elif 's' in url_data:
1246                         encrypted_sig = url_data['s'][0]
1247                         if self._downloader.params.get('verbose'):
1248                             if age_gate:
1249                                 if player_url is None:
1250                                     player_version = 'unknown'
1251                                 else:
1252                                     player_version = self._search_regex(
1253                                         r'-(.+)\.swf$', player_url,
1254                                         u'flash player', fatal=False)
1255                                 player_desc = 'flash player %s' % player_version
1256                             else:
1257                                 player_version = self._search_regex(
1258                                     r'html5player-(.+?)\.js', video_webpage,
1259                                     'html5 player', fatal=False)
1260                                 player_desc = u'html5 player %s' % player_version
1261
1262                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1263                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1264                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1265
1266                         if not age_gate:
1267                             jsplayer_url_json = self._search_regex(
1268                                 r'"assets":.+?"js":\s*("[^"]+")',
1269                                 video_webpage, u'JS player URL')
1270                             player_url = json.loads(jsplayer_url_json)
1271
1272                         signature = self._decrypt_signature(
1273                             encrypted_sig, video_id, player_url, age_gate)
1274                         url += '&signature=' + signature
1275                     if 'ratebypass' not in url:
1276                         url += '&ratebypass=yes'
1277                     url_map[url_data['itag'][0]] = url
1278             formats = _map_to_format_list(url_map)
1279         elif video_info.get('hlsvp'):
1280             manifest_url = video_info['hlsvp'][0]
1281             url_map = self._extract_from_m3u8(manifest_url, video_id)
1282             formats = _map_to_format_list(url_map)
1283         else:
1284             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1285
1286         # Look for the DASH manifest
1287         if (self._downloader.params.get('youtube_include_dash_manifest', False)):
1288             try:
1289                 # The DASH manifest used needs to be the one from the original video_webpage.
1290                 # The one found in get_video_info seems to be using different signatures.
1291                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
1292                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
1293                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
1294                 if age_gate:
1295                     dash_manifest_url = video_info.get('dashmpd')[0]
1296                 else:
1297                     dash_manifest_url = ytplayer_config['args']['dashmpd']
1298                 def decrypt_sig(mobj):
1299                     s = mobj.group(1)
1300                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1301                     return '/signature/%s' % dec_s
1302                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
1303                 dash_doc = self._download_xml(
1304                     dash_manifest_url, video_id,
1305                     note=u'Downloading DASH manifest',
1306                     errnote=u'Could not download DASH manifest')
1307                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1308                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1309                     if url_el is None:
1310                         continue
1311                     format_id = r.attrib['id']
1312                     video_url = url_el.text
1313                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1314                     f = {
1315                         'format_id': format_id,
1316                         'url': video_url,
1317                         'width': int_or_none(r.attrib.get('width')),
1318                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1319                         'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1320                         'filesize': filesize,
1321                     }
1322                     try:
1323                         existing_format = next(
1324                             fo for fo in formats
1325                             if fo['format_id'] == format_id)
1326                     except StopIteration:
1327                         f.update(self._formats.get(format_id, {}))
1328                         formats.append(f)
1329                     else:
1330                         existing_format.update(f)
1331
1332             except (ExtractorError, KeyError) as e:
1333                 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
1334
1335         self._sort_formats(formats)
1336
1337         return {
1338             'id':           video_id,
1339             'uploader':     video_uploader,
1340             'uploader_id':  video_uploader_id,
1341             'upload_date':  upload_date,
1342             'title':        video_title,
1343             'thumbnail':    video_thumbnail,
1344             'description':  video_description,
1345             'subtitles':    video_subtitles,
1346             'duration':     video_duration,
1347             'age_limit':    18 if age_gate else 0,
1348             'annotations':  video_annotations,
1349             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1350             'view_count':   view_count,
1351             'like_count': like_count,
1352             'dislike_count': dislike_count,
1353             'formats':      formats,
1354         }
1355
1356 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1357     IE_DESC = u'YouTube.com playlists'
1358     _VALID_URL = r"""(?x)(?:
1359                         (?:https?://)?
1360                         (?:\w+\.)?
1361                         youtube\.com/
1362                         (?:
1363                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1364                            \? (?:.*?&)*? (?:p|a|list)=
1365                         |  p/
1366                         )
1367                         (
1368                             (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
1369                             # Top tracks, they can also include dots
1370                             |(?:MC)[\w\.]*
1371                         )
1372                         .*
1373                      |
1374                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
1375                      )"""
1376     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1377     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1378     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1379     IE_NAME = u'youtube:playlist'
1380
1381     def _real_initialize(self):
1382         self._login()
1383
1384     def _ids_to_results(self, ids):
1385         return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1386                        for vid_id in ids]
1387
1388     def _extract_mix(self, playlist_id):
1389         # The mixes are generated from a a single video
1390         # the id of the playlist is just 'RD' + video_id
1391         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1392         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
1393         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1394         title_span = (search_title('playlist-title') or
1395             search_title('title long-title') or search_title('title'))
1396         title = clean_html(title_span)
1397         video_re = r'''(?x)data-video-username="(.*?)".*?
1398                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
1399         matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
1400         # Some of the videos may have been deleted, their username field is empty
1401         ids = [video_id for (username, video_id) in matches if username]
1402         url_results = self._ids_to_results(ids)
1403
1404         return self.playlist_result(url_results, playlist_id, title)
1405
1406     def _real_extract(self, url):
1407         # Extract playlist id
1408         mobj = re.match(self._VALID_URL, url)
1409         if mobj is None:
1410             raise ExtractorError(u'Invalid URL: %s' % url)
1411         playlist_id = mobj.group(1) or mobj.group(2)
1412
1413         # Check if it's a video-specific URL
1414         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1415         if 'v' in query_dict:
1416             video_id = query_dict['v'][0]
1417             if self._downloader.params.get('noplaylist'):
1418                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1419                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1420             else:
1421                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1422
1423         if playlist_id.startswith('RD'):
1424             # Mixes require a custom extraction process
1425             return self._extract_mix(playlist_id)
1426         if playlist_id.startswith('TL'):
1427             raise ExtractorError(u'For downloading YouTube.com top lists, use '
1428                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
1429
1430         url = self._TEMPLATE_URL % playlist_id
1431         page = self._download_webpage(url, playlist_id)
1432         more_widget_html = content_html = page
1433
1434         # Extract the video ids from the playlist pages
1435         ids = []
1436
1437         for page_num in itertools.count(1):
1438             matches = re.finditer(self._VIDEO_RE, content_html)
1439             # We remove the duplicates and the link with index 0
1440             # (it's not the first video of the playlist)
1441             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1442             ids.extend(new_ids)
1443
1444             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1445             if not mobj:
1446                 break
1447
1448             more = self._download_json(
1449                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1450                 'Downloading page #%s' % page_num,
1451                 transform_source=uppercase_escape)
1452             content_html = more['content_html']
1453             more_widget_html = more['load_more_widget_html']
1454
1455         playlist_title = self._html_search_regex(
1456             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1457             page, u'title')
1458
1459         url_results = self._ids_to_results(ids)
1460         return self.playlist_result(url_results, playlist_id, playlist_title)
1461
1462
1463 class YoutubeTopListIE(YoutubePlaylistIE):
1464     IE_NAME = u'youtube:toplist'
1465     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1466         u' (Example: "yttoplist:music:Top Tracks")')
1467     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1468
1469     def _real_extract(self, url):
1470         mobj = re.match(self._VALID_URL, url)
1471         channel = mobj.group('chann')
1472         title = mobj.group('title')
1473         query = compat_urllib_parse.urlencode({'title': title})
1474         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
1475         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1476         link = self._html_search_regex(playlist_re, channel_page, u'list')
1477         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1478
1479         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1480         ids = []
1481         # sometimes the webpage doesn't contain the videos
1482         # retry until we get them
1483         for i in itertools.count(0):
1484             msg = u'Downloading Youtube mix'
1485             if i > 0:
1486                 msg += ', retry #%d' % i
1487             webpage = self._download_webpage(url, title, msg)
1488             ids = orderedSet(re.findall(video_re, webpage))
1489             if ids:
1490                 break
1491         url_results = self._ids_to_results(ids)
1492         return self.playlist_result(url_results, playlist_title=title)
1493
1494
1495 class YoutubeChannelIE(InfoExtractor):
1496     IE_DESC = u'YouTube.com channels'
1497     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1498     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1499     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1500     IE_NAME = u'youtube:channel'
1501
1502     def extract_videos_from_page(self, page):
1503         ids_in_page = []
1504         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1505             if mobj.group(1) not in ids_in_page:
1506                 ids_in_page.append(mobj.group(1))
1507         return ids_in_page
1508
1509     def _real_extract(self, url):
1510         # Extract channel id
1511         mobj = re.match(self._VALID_URL, url)
1512         if mobj is None:
1513             raise ExtractorError(u'Invalid URL: %s' % url)
1514
1515         # Download channel page
1516         channel_id = mobj.group(1)
1517         video_ids = []
1518         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1519         channel_page = self._download_webpage(url, channel_id)
1520         autogenerated = re.search(r'''(?x)
1521                 class="[^"]*?(?:
1522                     channel-header-autogenerated-label|
1523                     yt-channel-title-autogenerated
1524                 )[^"]*"''', channel_page) is not None
1525
1526         if autogenerated:
1527             # The videos are contained in a single page
1528             # the ajax pages can't be used, they are empty
1529             video_ids = self.extract_videos_from_page(channel_page)
1530         else:
1531             # Download all channel pages using the json-based channel_ajax query
1532             for pagenum in itertools.count(1):
1533                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1534                 page = self._download_json(
1535                     url, channel_id, note=u'Downloading page #%s' % pagenum,
1536                     transform_source=uppercase_escape)
1537
1538                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1539                 video_ids.extend(ids_in_page)
1540
1541                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1542                     break
1543
1544         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1545
1546         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1547                        for video_id in video_ids]
1548         return self.playlist_result(url_entries, channel_id)
1549
1550
1551 class YoutubeUserIE(InfoExtractor):
1552     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1553     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1554     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
1555     _GDATA_PAGE_SIZE = 50
1556     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1557     IE_NAME = u'youtube:user'
1558
1559     @classmethod
1560     def suitable(cls, url):
1561         # Don't return True if the url can be extracted with other youtube
1562         # extractor, the regex would is too permissive and it would match.
1563         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1564         if any(ie.suitable(url) for ie in other_ies): return False
1565         else: return super(YoutubeUserIE, cls).suitable(url)
1566
1567     def _real_extract(self, url):
1568         # Extract username
1569         mobj = re.match(self._VALID_URL, url)
1570         if mobj is None:
1571             raise ExtractorError(u'Invalid URL: %s' % url)
1572
1573         username = mobj.group(1)
1574
1575         # Download video ids using YouTube Data API. Result size per
1576         # query is limited (currently to 50 videos) so we need to query
1577         # page by page until there are no video ids - it means we got
1578         # all of them.
1579
1580         def download_page(pagenum):
1581             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1582
1583             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1584             page = self._download_webpage(
1585                 gdata_url, username,
1586                 u'Downloading video ids from %d to %d' % (
1587                     start_index, start_index + self._GDATA_PAGE_SIZE))
1588
1589             try:
1590                 response = json.loads(page)
1591             except ValueError as err:
1592                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1593             if 'entry' not in response['feed']:
1594                 return
1595
1596             # Extract video identifiers
1597             entries = response['feed']['entry']
1598             for entry in entries:
1599                 title = entry['title']['$t']
1600                 video_id = entry['id']['$t'].split('/')[-1]
1601                 yield {
1602                     '_type': 'url',
1603                     'url': video_id,
1604                     'ie_key': 'Youtube',
1605                     'id': video_id,
1606                     'title': title,
1607                 }
1608         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
1609
1610         return self.playlist_result(url_results, playlist_title=username)
1611
1612
1613 class YoutubeSearchIE(SearchInfoExtractor):
1614     IE_DESC = u'YouTube.com searches'
1615     _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1616     _MAX_RESULTS = 1000
1617     IE_NAME = u'youtube:search'
1618     _SEARCH_KEY = 'ytsearch'
1619
1620     def _get_n_results(self, query, n):
1621         """Get a specified number of results for a query"""
1622
1623         video_ids = []
1624         pagenum = 0
1625         limit = n
1626         PAGE_SIZE = 50
1627
1628         while (PAGE_SIZE * pagenum) < limit:
1629             result_url = self._API_URL % (
1630                 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1631                 (PAGE_SIZE * pagenum) + 1)
1632             data_json = self._download_webpage(
1633                 result_url, video_id=u'query "%s"' % query,
1634                 note=u'Downloading page %s' % (pagenum + 1),
1635                 errnote=u'Unable to download API page')
1636             data = json.loads(data_json)
1637             api_response = data['data']
1638
1639             if 'items' not in api_response:
1640                 raise ExtractorError(
1641                     u'[youtube] No video results', expected=True)
1642
1643             new_ids = list(video['id'] for video in api_response['items'])
1644             video_ids += new_ids
1645
1646             limit = min(n, api_response['totalItems'])
1647             pagenum += 1
1648
1649         if len(video_ids) > n:
1650             video_ids = video_ids[:n]
1651         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1652                   for video_id in video_ids]
1653         return self.playlist_result(videos, query)
1654
1655
1656 class YoutubeSearchDateIE(YoutubeSearchIE):
1657     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1658     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1659     _SEARCH_KEY = 'ytsearchdate'
1660     IE_DESC = u'YouTube.com searches, newest videos first'
1661
1662
1663 class YoutubeSearchURLIE(InfoExtractor):
1664     IE_DESC = u'YouTube.com search URLs'
1665     IE_NAME = u'youtube:search_url'
1666     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1667
1668     def _real_extract(self, url):
1669         mobj = re.match(self._VALID_URL, url)
1670         query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1671
1672         webpage = self._download_webpage(url, query)
1673         result_code = self._search_regex(
1674             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
1675
1676         part_codes = re.findall(
1677             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1678         entries = []
1679         for part_code in part_codes:
1680             part_title = self._html_search_regex(
1681                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
1682             part_url_snippet = self._html_search_regex(
1683                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1684             part_url = compat_urlparse.urljoin(
1685                 'https://www.youtube.com/', part_url_snippet)
1686             entries.append({
1687                 '_type': 'url',
1688                 'url': part_url,
1689                 'title': part_title,
1690             })
1691
1692         return {
1693             '_type': 'playlist',
1694             'entries': entries,
1695             'title': query,
1696         }
1697
1698
1699 class YoutubeShowIE(InfoExtractor):
1700     IE_DESC = u'YouTube.com (multi-season) shows'
1701     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1702     IE_NAME = u'youtube:show'
1703
1704     def _real_extract(self, url):
1705         mobj = re.match(self._VALID_URL, url)
1706         show_name = mobj.group(1)
1707         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1708         # There's one playlist for each season of the show
1709         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1710         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1711         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1712
1713
1714 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1715     """
1716     Base class for extractors that fetch info from
1717     http://www.youtube.com/feed_ajax
1718     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1719     """
1720     _LOGIN_REQUIRED = True
1721     # use action_load_personal_feed instead of action_load_system_feed
1722     _PERSONAL_FEED = False
1723
1724     @property
1725     def _FEED_TEMPLATE(self):
1726         action = 'action_load_system_feed'
1727         if self._PERSONAL_FEED:
1728             action = 'action_load_personal_feed'
1729         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1730
1731     @property
1732     def IE_NAME(self):
1733         return u'youtube:%s' % self._FEED_NAME
1734
1735     def _real_initialize(self):
1736         self._login()
1737
1738     def _real_extract(self, url):
1739         feed_entries = []
1740         paging = 0
1741         for i in itertools.count(1):
1742             info = self._download_json(self._FEED_TEMPLATE % paging,
1743                                           u'%s feed' % self._FEED_NAME,
1744                                           u'Downloading page %s' % i)
1745             feed_html = info.get('feed_html') or info.get('content_html')
1746             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1747             ids = orderedSet(m.group(1) for m in m_ids)
1748             feed_entries.extend(
1749                 self.url_result(video_id, 'Youtube', video_id=video_id)
1750                 for video_id in ids)
1751             if info['paging'] is None:
1752                 break
1753             paging = info['paging']
1754         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1755
1756 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1757     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1758     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1759     _FEED_NAME = 'subscriptions'
1760     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1761
1762 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1763     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1764     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1765     _FEED_NAME = 'recommended'
1766     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1767
1768 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1769     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1770     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1771     _FEED_NAME = 'watch_later'
1772     _PLAYLIST_TITLE = u'Youtube Watch Later'
1773     _PERSONAL_FEED = True
1774
1775 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1776     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1777     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1778     _FEED_NAME = 'history'
1779     _PERSONAL_FEED = True
1780     _PLAYLIST_TITLE = u'Youtube Watch History'
1781
1782 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1783     IE_NAME = u'youtube:favorites'
1784     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1785     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1786     _LOGIN_REQUIRED = True
1787
1788     def _real_extract(self, url):
1789         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1790         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1791         return self.url_result(playlist_id, 'YoutubePlaylist')
1792
1793
1794 class YoutubeTruncatedURLIE(InfoExtractor):
1795     IE_NAME = 'youtube:truncated_url'
1796     IE_DESC = False  # Do not list
1797     _VALID_URL = r'''(?x)
1798         (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
1799         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1800     '''
1801
1802     def _real_extract(self, url):
1803         raise ExtractorError(
1804             u'Did you forget to quote the URL? Remember that & is a meta '
1805             u'character in most shells, so you want to put the URL in quotes, '
1806             u'like  youtube-dl '
1807             u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1808             u' or simply  youtube-dl BaW_jenozKc  .',
1809             expected=True)