Merge pull request #1531 from rg3/no-playlist
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import xml.etree.ElementTree
15 import zlib
16
17 from .common import InfoExtractor, SearchInfoExtractor
18 from .subtitles import SubtitlesInfoExtractor
19 from ..utils import (
20     compat_chr,
21     compat_http_client,
22     compat_parse_qs,
23     compat_urllib_error,
24     compat_urllib_parse,
25     compat_urllib_request,
26     compat_urlparse,
27     compat_str,
28
29     clean_html,
30     get_element_by_id,
31     ExtractorError,
32     unescapeHTML,
33     unified_strdate,
34     orderedSet,
35     write_json_file,
36 )
37
38 class YoutubeBaseInfoExtractor(InfoExtractor):
39     """Provide base functions for Youtube extractors"""
40     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
41     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
42     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
43     _NETRC_MACHINE = 'youtube'
44     # If True it will raise an error if no login info is provided
45     _LOGIN_REQUIRED = False
46
47     def report_lang(self):
48         """Report attempt to set language."""
49         self.to_screen(u'Setting language')
50
51     def _set_language(self):
52         request = compat_urllib_request.Request(self._LANG_URL)
53         try:
54             self.report_lang()
55             compat_urllib_request.urlopen(request).read()
56         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
57             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
58             return False
59         return True
60
61     def _login(self):
62         (username, password) = self._get_login_info()
63         # No authentication to be performed
64         if username is None:
65             if self._LOGIN_REQUIRED:
66                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
67             return False
68
69         request = compat_urllib_request.Request(self._LOGIN_URL)
70         try:
71             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
72         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
73             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
74             return False
75
76         galx = None
77         dsh = None
78         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
79         if match:
80           galx = match.group(1)
81         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
82         if match:
83           dsh = match.group(1)
84
85         # Log in
86         login_form_strs = {
87                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
88                 u'Email': username,
89                 u'GALX': galx,
90                 u'Passwd': password,
91                 u'PersistentCookie': u'yes',
92                 u'_utf8': u'霱',
93                 u'bgresponse': u'js_disabled',
94                 u'checkConnection': u'',
95                 u'checkedDomains': u'youtube',
96                 u'dnConn': u'',
97                 u'dsh': dsh,
98                 u'pstMsg': u'0',
99                 u'rmShown': u'1',
100                 u'secTok': u'',
101                 u'signIn': u'Sign in',
102                 u'timeStmp': u'',
103                 u'service': u'youtube',
104                 u'uilel': u'3',
105                 u'hl': u'en_US',
106         }
107         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
108         # chokes on unicode
109         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
110         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
111         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
112         try:
113             self.report_login()
114             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
115             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
116                 self._downloader.report_warning(u'unable to log in: bad username or password')
117                 return False
118         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
119             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
120             return False
121         return True
122
123     def _confirm_age(self):
124         age_form = {
125                 'next_url':     '/',
126                 'action_confirm':   'Confirm',
127                 }
128         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
129         try:
130             self.report_age_confirmation()
131             compat_urllib_request.urlopen(request).read().decode('utf-8')
132         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
133             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
134         return True
135
136     def _real_initialize(self):
137         if self._downloader is None:
138             return
139         if not self._set_language():
140             return
141         if not self._login():
142             return
143         self._confirm_age()
144
145
146 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
147     IE_DESC = u'YouTube.com'
148     _VALID_URL = r"""^
149                      (
150                          (?:https?://)?                                       # http(s):// (optional)
151                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
152                             tube\.majestyc\.net/|
153                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
154                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
155                          (?:                                                  # the various things that can precede the ID:
156                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
157                              |(?:                                             # or the v= param in all its forms
158                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
159                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
160                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
161                                  v=
162                              )
163                          ))
164                          |youtu\.be/                                          # just youtu.be/xxxx
165                          )
166                      )?                                                       # all until now is optional -> you can pass the naked ID
167                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
168                      (?(1).+)?                                                # if we found the ID, everything can follow
169                      $"""
170     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
171     # Listed in order of quality
172     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
173                           # Apple HTTP Live Streaming
174                           '96', '95', '94', '93', '92', '132', '151',
175                           # 3D
176                           '85', '84', '102', '83', '101', '82', '100',
177                           # Dash video
178                           '138', '137', '248', '136', '247', '135', '246',
179                           '245', '244', '134', '243', '133', '242', '160',
180                           # Dash audio
181                           '141', '172', '140', '171', '139',
182                           ]
183     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
184                                       # Apple HTTP Live Streaming
185                                       '96', '95', '94', '93', '92', '132', '151',
186                                       # 3D
187                                       '85', '102', '84', '101', '83', '100', '82',
188                                       # Dash video
189                                       '138', '248', '137', '247', '136', '246', '245',
190                                       '244', '135', '243', '134', '242', '133', '160',
191                                       # Dash audio
192                                       '172', '141', '171', '140', '139',
193                                       ]
194     _video_formats_map = {
195         'flv': ['35', '34', '6', '5'],
196         '3gp': ['36', '17', '13'],
197         'mp4': ['38', '37', '22', '18'],
198         'webm': ['46', '45', '44', '43'],
199     }
200     _video_extensions = {
201         '13': '3gp',
202         '17': '3gp',
203         '18': 'mp4',
204         '22': 'mp4',
205         '36': '3gp',
206         '37': 'mp4',
207         '38': 'mp4',
208         '43': 'webm',
209         '44': 'webm',
210         '45': 'webm',
211         '46': 'webm',
212
213         # 3d videos
214         '82': 'mp4',
215         '83': 'mp4',
216         '84': 'mp4',
217         '85': 'mp4',
218         '100': 'webm',
219         '101': 'webm',
220         '102': 'webm',
221
222         # Apple HTTP Live Streaming
223         '92': 'mp4',
224         '93': 'mp4',
225         '94': 'mp4',
226         '95': 'mp4',
227         '96': 'mp4',
228         '132': 'mp4',
229         '151': 'mp4',
230
231         # Dash mp4
232         '133': 'mp4',
233         '134': 'mp4',
234         '135': 'mp4',
235         '136': 'mp4',
236         '137': 'mp4',
237         '138': 'mp4',
238         '139': 'mp4',
239         '140': 'mp4',
240         '141': 'mp4',
241         '160': 'mp4',
242
243         # Dash webm
244         '171': 'webm',
245         '172': 'webm',
246         '242': 'webm',
247         '243': 'webm',
248         '244': 'webm',
249         '245': 'webm',
250         '246': 'webm',
251         '247': 'webm',
252         '248': 'webm',
253     }
254     _video_dimensions = {
255         '5': '240x400',
256         '6': '???',
257         '13': '???',
258         '17': '144x176',
259         '18': '360x640',
260         '22': '720x1280',
261         '34': '360x640',
262         '35': '480x854',
263         '36': '240x320',
264         '37': '1080x1920',
265         '38': '3072x4096',
266         '43': '360x640',
267         '44': '480x854',
268         '45': '720x1280',
269         '46': '1080x1920',
270         '82': '360p',
271         '83': '480p',
272         '84': '720p',
273         '85': '1080p',
274         '92': '240p',
275         '93': '360p',
276         '94': '480p',
277         '95': '720p',
278         '96': '1080p',
279         '100': '360p',
280         '101': '480p',
281         '102': '720p',
282         '132': '240p',
283         '151': '72p',
284         '133': '240p',
285         '134': '360p',
286         '135': '480p',
287         '136': '720p',
288         '137': '1080p',
289         '138': '>1080p',
290         '139': '48k',
291         '140': '128k',
292         '141': '256k',
293         '160': '192p',
294         '171': '128k',
295         '172': '256k',
296         '242': '240p',
297         '243': '360p',
298         '244': '480p',
299         '245': '480p',
300         '246': '480p',
301         '247': '720p',
302         '248': '1080p',
303     }
304     _special_itags = {
305         '82': '3D',
306         '83': '3D',
307         '84': '3D',
308         '85': '3D',
309         '100': '3D',
310         '101': '3D',
311         '102': '3D',
312         '133': 'DASH Video',
313         '134': 'DASH Video',
314         '135': 'DASH Video',
315         '136': 'DASH Video',
316         '137': 'DASH Video',
317         '138': 'DASH Video',
318         '139': 'DASH Audio',
319         '140': 'DASH Audio',
320         '141': 'DASH Audio',
321         '160': 'DASH Video',
322         '171': 'DASH Audio',
323         '172': 'DASH Audio',
324         '242': 'DASH Video',
325         '243': 'DASH Video',
326         '244': 'DASH Video',
327         '245': 'DASH Video',
328         '246': 'DASH Video',
329         '247': 'DASH Video',
330         '248': 'DASH Video',
331     }
332
333     IE_NAME = u'youtube'
334     _TESTS = [
335         {
336             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
337             u"file":  u"BaW_jenozKc.mp4",
338             u"info_dict": {
339                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
340                 u"uploader": u"Philipp Hagemeister",
341                 u"uploader_id": u"phihag",
342                 u"upload_date": u"20121002",
343                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
344             }
345         },
346         {
347             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
348             u"file":  u"1ltcDfZMA3U.flv",
349             u"note": u"Test VEVO video (#897)",
350             u"info_dict": {
351                 u"upload_date": u"20070518",
352                 u"title": u"Maps - It Will Find You",
353                 u"description": u"Music video by Maps performing It Will Find You.",
354                 u"uploader": u"MuteUSA",
355                 u"uploader_id": u"MuteUSA"
356             }
357         },
358         {
359             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
360             u"file":  u"UxxajLWwzqY.mp4",
361             u"note": u"Test generic use_cipher_signature video (#897)",
362             u"info_dict": {
363                 u"upload_date": u"20120506",
364                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
365                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
366                 u"uploader": u"Icona Pop",
367                 u"uploader_id": u"IconaPop"
368             }
369         },
370         {
371             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
372             u"file":  u"07FYdnEawAQ.mp4",
373             u"note": u"Test VEVO video with age protection (#956)",
374             u"info_dict": {
375                 u"upload_date": u"20130703",
376                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
377                 u"description": u"md5:64249768eec3bc4276236606ea996373",
378                 u"uploader": u"justintimberlakeVEVO",
379                 u"uploader_id": u"justintimberlakeVEVO"
380             }
381         },
382     ]
383
384
385     @classmethod
386     def suitable(cls, url):
387         """Receives a URL and returns True if suitable for this IE."""
388         if YoutubePlaylistIE.suitable(url): return False
389         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
390
391     def __init__(self, *args, **kwargs):
392         super(YoutubeIE, self).__init__(*args, **kwargs)
393         self._player_cache = {}
394
395     def report_video_webpage_download(self, video_id):
396         """Report attempt to download video webpage."""
397         self.to_screen(u'%s: Downloading video webpage' % video_id)
398
399     def report_video_info_webpage_download(self, video_id):
400         """Report attempt to download video info webpage."""
401         self.to_screen(u'%s: Downloading video info webpage' % video_id)
402
403     def report_information_extraction(self, video_id):
404         """Report attempt to extract video information."""
405         self.to_screen(u'%s: Extracting video information' % video_id)
406
407     def report_unavailable_format(self, video_id, format):
408         """Report extracted video URL."""
409         self.to_screen(u'%s: Format %s not available' % (video_id, format))
410
411     def report_rtmp_download(self):
412         """Indicate the download will use the RTMP protocol."""
413         self.to_screen(u'RTMP download detected')
414
415     def _extract_signature_function(self, video_id, player_url, slen):
416         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
417                         player_url)
418         player_type = id_m.group('ext')
419         player_id = id_m.group('id')
420
421         # Read from filesystem cache
422         func_id = '%s_%s_%d' % (player_type, player_id, slen)
423         assert os.path.basename(func_id) == func_id
424         cache_dir = self._downloader.params.get('cachedir',
425                                                 u'~/.youtube-dl/cache')
426
427         cache_enabled = cache_dir is not None
428         if cache_enabled:
429             cache_fn = os.path.join(os.path.expanduser(cache_dir),
430                                     u'youtube-sigfuncs',
431                                     func_id + '.json')
432             try:
433                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
434                     cache_spec = json.load(cachef)
435                 return lambda s: u''.join(s[i] for i in cache_spec)
436             except IOError:
437                 pass  # No cache available
438
439         if player_type == 'js':
440             code = self._download_webpage(
441                 player_url, video_id,
442                 note=u'Downloading %s player %s' % (player_type, player_id),
443                 errnote=u'Download of %s failed' % player_url)
444             res = self._parse_sig_js(code)
445         elif player_type == 'swf':
446             urlh = self._request_webpage(
447                 player_url, video_id,
448                 note=u'Downloading %s player %s' % (player_type, player_id),
449                 errnote=u'Download of %s failed' % player_url)
450             code = urlh.read()
451             res = self._parse_sig_swf(code)
452         else:
453             assert False, 'Invalid player type %r' % player_type
454
455         if cache_enabled:
456             try:
457                 test_string = u''.join(map(compat_chr, range(slen)))
458                 cache_res = res(test_string)
459                 cache_spec = [ord(c) for c in cache_res]
460                 try:
461                     os.makedirs(os.path.dirname(cache_fn))
462                 except OSError as ose:
463                     if ose.errno != errno.EEXIST:
464                         raise
465                 write_json_file(cache_spec, cache_fn)
466             except Exception:
467                 tb = traceback.format_exc()
468                 self._downloader.report_warning(
469                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
470
471         return res
472
473     def _print_sig_code(self, func, slen):
474         def gen_sig_code(idxs):
475             def _genslice(start, end, step):
476                 starts = u'' if start == 0 else str(start)
477                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
478                 steps = u'' if step == 1 else (u':%d' % step)
479                 return u's[%s%s%s]' % (starts, ends, steps)
480
481             step = None
482             start = '(Never used)'  # Quelch pyflakes warnings - start will be
483                                     # set as soon as step is set
484             for i, prev in zip(idxs[1:], idxs[:-1]):
485                 if step is not None:
486                     if i - prev == step:
487                         continue
488                     yield _genslice(start, prev, step)
489                     step = None
490                     continue
491                 if i - prev in [-1, 1]:
492                     step = i - prev
493                     start = prev
494                     continue
495                 else:
496                     yield u's[%d]' % prev
497             if step is None:
498                 yield u's[%d]' % i
499             else:
500                 yield _genslice(start, i, step)
501
502         test_string = u''.join(map(compat_chr, range(slen)))
503         cache_res = func(test_string)
504         cache_spec = [ord(c) for c in cache_res]
505         expr_code = u' + '.join(gen_sig_code(cache_spec))
506         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
507         self.to_screen(u'Extracted signature function:\n' + code)
508
509     def _parse_sig_js(self, jscode):
510         funcname = self._search_regex(
511             r'signature=([a-zA-Z]+)', jscode,
512             u'Initial JS player signature function name')
513
514         functions = {}
515
516         def argidx(varname):
517             return string.lowercase.index(varname)
518
519         def interpret_statement(stmt, local_vars, allow_recursion=20):
520             if allow_recursion < 0:
521                 raise ExtractorError(u'Recursion limit reached')
522
523             if stmt.startswith(u'var '):
524                 stmt = stmt[len(u'var '):]
525             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
526                              r'=(?P<expr>.*)$', stmt)
527             if ass_m:
528                 if ass_m.groupdict().get('index'):
529                     def assign(val):
530                         lvar = local_vars[ass_m.group('out')]
531                         idx = interpret_expression(ass_m.group('index'),
532                                                    local_vars, allow_recursion)
533                         assert isinstance(idx, int)
534                         lvar[idx] = val
535                         return val
536                     expr = ass_m.group('expr')
537                 else:
538                     def assign(val):
539                         local_vars[ass_m.group('out')] = val
540                         return val
541                     expr = ass_m.group('expr')
542             elif stmt.startswith(u'return '):
543                 assign = lambda v: v
544                 expr = stmt[len(u'return '):]
545             else:
546                 raise ExtractorError(
547                     u'Cannot determine left side of statement in %r' % stmt)
548
549             v = interpret_expression(expr, local_vars, allow_recursion)
550             return assign(v)
551
552         def interpret_expression(expr, local_vars, allow_recursion):
553             if expr.isdigit():
554                 return int(expr)
555
556             if expr.isalpha():
557                 return local_vars[expr]
558
559             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
560             if m:
561                 member = m.group('member')
562                 val = local_vars[m.group('in')]
563                 if member == 'split("")':
564                     return list(val)
565                 if member == 'join("")':
566                     return u''.join(val)
567                 if member == 'length':
568                     return len(val)
569                 if member == 'reverse()':
570                     return val[::-1]
571                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
572                 if slice_m:
573                     idx = interpret_expression(
574                         slice_m.group('idx'), local_vars, allow_recursion-1)
575                     return val[idx:]
576
577             m = re.match(
578                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
579             if m:
580                 val = local_vars[m.group('in')]
581                 idx = interpret_expression(m.group('idx'), local_vars,
582                                            allow_recursion-1)
583                 return val[idx]
584
585             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
586             if m:
587                 a = interpret_expression(m.group('a'),
588                                          local_vars, allow_recursion)
589                 b = interpret_expression(m.group('b'),
590                                          local_vars, allow_recursion)
591                 return a % b
592
593             m = re.match(
594                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
595             if m:
596                 fname = m.group('func')
597                 if fname not in functions:
598                     functions[fname] = extract_function(fname)
599                 argvals = [int(v) if v.isdigit() else local_vars[v]
600                            for v in m.group('args').split(',')]
601                 return functions[fname](argvals)
602             raise ExtractorError(u'Unsupported JS expression %r' % expr)
603
604         def extract_function(funcname):
605             func_m = re.search(
606                 r'function ' + re.escape(funcname) +
607                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
608                 jscode)
609             argnames = func_m.group('args').split(',')
610
611             def resf(args):
612                 local_vars = dict(zip(argnames, args))
613                 for stmt in func_m.group('code').split(';'):
614                     res = interpret_statement(stmt, local_vars)
615                 return res
616             return resf
617
618         initial_function = extract_function(funcname)
619         return lambda s: initial_function([s])
620
621     def _parse_sig_swf(self, file_contents):
622         if file_contents[1:3] != b'WS':
623             raise ExtractorError(
624                 u'Not an SWF file; header is %r' % file_contents[:3])
625         if file_contents[:1] == b'C':
626             content = zlib.decompress(file_contents[8:])
627         else:
628             raise NotImplementedError(u'Unsupported compression format %r' %
629                                       file_contents[:1])
630
631         def extract_tags(content):
632             pos = 0
633             while pos < len(content):
634                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
635                 pos += 2
636                 tag_code = header16 >> 6
637                 tag_len = header16 & 0x3f
638                 if tag_len == 0x3f:
639                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
640                     pos += 4
641                 assert pos+tag_len <= len(content)
642                 yield (tag_code, content[pos:pos+tag_len])
643                 pos += tag_len
644
645         code_tag = next(tag
646                         for tag_code, tag in extract_tags(content)
647                         if tag_code == 82)
648         p = code_tag.index(b'\0', 4) + 1
649         code_reader = io.BytesIO(code_tag[p:])
650
651         # Parse ABC (AVM2 ByteCode)
652         def read_int(reader=None):
653             if reader is None:
654                 reader = code_reader
655             res = 0
656             shift = 0
657             for _ in range(5):
658                 buf = reader.read(1)
659                 assert len(buf) == 1
660                 b = struct.unpack('<B', buf)[0]
661                 res = res | ((b & 0x7f) << shift)
662                 if b & 0x80 == 0:
663                     break
664                 shift += 7
665             return res
666
667         def u30(reader=None):
668             res = read_int(reader)
669             assert res & 0xf0000000 == 0
670             return res
671         u32 = read_int
672
673         def s32(reader=None):
674             v = read_int(reader)
675             if v & 0x80000000 != 0:
676                 v = - ((v ^ 0xffffffff) + 1)
677             return v
678
679         def read_string(reader=None):
680             if reader is None:
681                 reader = code_reader
682             slen = u30(reader)
683             resb = reader.read(slen)
684             assert len(resb) == slen
685             return resb.decode('utf-8')
686
687         def read_bytes(count, reader=None):
688             if reader is None:
689                 reader = code_reader
690             resb = reader.read(count)
691             assert len(resb) == count
692             return resb
693
694         def read_byte(reader=None):
695             resb = read_bytes(1, reader=reader)
696             res = struct.unpack('<B', resb)[0]
697             return res
698
699         # minor_version + major_version
700         read_bytes(2 + 2)
701
702         # Constant pool
703         int_count = u30()
704         for _c in range(1, int_count):
705             s32()
706         uint_count = u30()
707         for _c in range(1, uint_count):
708             u32()
709         double_count = u30()
710         read_bytes((double_count-1) * 8)
711         string_count = u30()
712         constant_strings = [u'']
713         for _c in range(1, string_count):
714             s = read_string()
715             constant_strings.append(s)
716         namespace_count = u30()
717         for _c in range(1, namespace_count):
718             read_bytes(1)  # kind
719             u30()  # name
720         ns_set_count = u30()
721         for _c in range(1, ns_set_count):
722             count = u30()
723             for _c2 in range(count):
724                 u30()
725         multiname_count = u30()
726         MULTINAME_SIZES = {
727             0x07: 2,  # QName
728             0x0d: 2,  # QNameA
729             0x0f: 1,  # RTQName
730             0x10: 1,  # RTQNameA
731             0x11: 0,  # RTQNameL
732             0x12: 0,  # RTQNameLA
733             0x09: 2,  # Multiname
734             0x0e: 2,  # MultinameA
735             0x1b: 1,  # MultinameL
736             0x1c: 1,  # MultinameLA
737         }
738         multinames = [u'']
739         for _c in range(1, multiname_count):
740             kind = u30()
741             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
742             if kind == 0x07:
743                 u30()  # namespace_idx
744                 name_idx = u30()
745                 multinames.append(constant_strings[name_idx])
746             else:
747                 multinames.append('[MULTINAME kind: %d]' % kind)
748                 for _c2 in range(MULTINAME_SIZES[kind]):
749                     u30()
750
751         # Methods
752         method_count = u30()
753         MethodInfo = collections.namedtuple(
754             'MethodInfo',
755             ['NEED_ARGUMENTS', 'NEED_REST'])
756         method_infos = []
757         for method_id in range(method_count):
758             param_count = u30()
759             u30()  # return type
760             for _ in range(param_count):
761                 u30()  # param type
762             u30()  # name index (always 0 for youtube)
763             flags = read_byte()
764             if flags & 0x08 != 0:
765                 # Options present
766                 option_count = u30()
767                 for c in range(option_count):
768                     u30()  # val
769                     read_bytes(1)  # kind
770             if flags & 0x80 != 0:
771                 # Param names present
772                 for _ in range(param_count):
773                     u30()  # param name
774             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
775             method_infos.append(mi)
776
777         # Metadata
778         metadata_count = u30()
779         for _c in range(metadata_count):
780             u30()  # name
781             item_count = u30()
782             for _c2 in range(item_count):
783                 u30()  # key
784                 u30()  # value
785
786         def parse_traits_info():
787             trait_name_idx = u30()
788             kind_full = read_byte()
789             kind = kind_full & 0x0f
790             attrs = kind_full >> 4
791             methods = {}
792             if kind in [0x00, 0x06]:  # Slot or Const
793                 u30()  # Slot id
794                 u30()  # type_name_idx
795                 vindex = u30()
796                 if vindex != 0:
797                     read_byte()  # vkind
798             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
799                 u30()  # disp_id
800                 method_idx = u30()
801                 methods[multinames[trait_name_idx]] = method_idx
802             elif kind == 0x04:  # Class
803                 u30()  # slot_id
804                 u30()  # classi
805             elif kind == 0x05:  # Function
806                 u30()  # slot_id
807                 function_idx = u30()
808                 methods[function_idx] = multinames[trait_name_idx]
809             else:
810                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
811
812             if attrs & 0x4 != 0:  # Metadata present
813                 metadata_count = u30()
814                 for _c3 in range(metadata_count):
815                     u30()  # metadata index
816
817             return methods
818
819         # Classes
820         TARGET_CLASSNAME = u'SignatureDecipher'
821         searched_idx = multinames.index(TARGET_CLASSNAME)
822         searched_class_id = None
823         class_count = u30()
824         for class_id in range(class_count):
825             name_idx = u30()
826             if name_idx == searched_idx:
827                 # We found the class we're looking for!
828                 searched_class_id = class_id
829             u30()  # super_name idx
830             flags = read_byte()
831             if flags & 0x08 != 0:  # Protected namespace is present
832                 u30()  # protected_ns_idx
833             intrf_count = u30()
834             for _c2 in range(intrf_count):
835                 u30()
836             u30()  # iinit
837             trait_count = u30()
838             for _c2 in range(trait_count):
839                 parse_traits_info()
840
841         if searched_class_id is None:
842             raise ExtractorError(u'Target class %r not found' %
843                                  TARGET_CLASSNAME)
844
845         method_names = {}
846         method_idxs = {}
847         for class_id in range(class_count):
848             u30()  # cinit
849             trait_count = u30()
850             for _c2 in range(trait_count):
851                 trait_methods = parse_traits_info()
852                 if class_id == searched_class_id:
853                     method_names.update(trait_methods.items())
854                     method_idxs.update(dict(
855                         (idx, name)
856                         for name, idx in trait_methods.items()))
857
858         # Scripts
859         script_count = u30()
860         for _c in range(script_count):
861             u30()  # init
862             trait_count = u30()
863             for _c2 in range(trait_count):
864                 parse_traits_info()
865
866         # Method bodies
867         method_body_count = u30()
868         Method = collections.namedtuple('Method', ['code', 'local_count'])
869         methods = {}
870         for _c in range(method_body_count):
871             method_idx = u30()
872             u30()  # max_stack
873             local_count = u30()
874             u30()  # init_scope_depth
875             u30()  # max_scope_depth
876             code_length = u30()
877             code = read_bytes(code_length)
878             if method_idx in method_idxs:
879                 m = Method(code, local_count)
880                 methods[method_idxs[method_idx]] = m
881             exception_count = u30()
882             for _c2 in range(exception_count):
883                 u30()  # from
884                 u30()  # to
885                 u30()  # target
886                 u30()  # exc_type
887                 u30()  # var_name
888             trait_count = u30()
889             for _c2 in range(trait_count):
890                 parse_traits_info()
891
892         assert p + code_reader.tell() == len(code_tag)
893         assert len(methods) == len(method_idxs)
894
895         method_pyfunctions = {}
896
897         def extract_function(func_name):
898             if func_name in method_pyfunctions:
899                 return method_pyfunctions[func_name]
900             if func_name not in methods:
901                 raise ExtractorError(u'Cannot find function %r' % func_name)
902             m = methods[func_name]
903
904             def resfunc(args):
905                 registers = ['(this)'] + list(args) + [None] * m.local_count
906                 stack = []
907                 coder = io.BytesIO(m.code)
908                 while True:
909                     opcode = struct.unpack('!B', coder.read(1))[0]
910                     if opcode == 36:  # pushbyte
911                         v = struct.unpack('!B', coder.read(1))[0]
912                         stack.append(v)
913                     elif opcode == 44:  # pushstring
914                         idx = u30(coder)
915                         stack.append(constant_strings[idx])
916                     elif opcode == 48:  # pushscope
917                         # We don't implement the scope register, so we'll just
918                         # ignore the popped value
919                         stack.pop()
920                     elif opcode == 70:  # callproperty
921                         index = u30(coder)
922                         mname = multinames[index]
923                         arg_count = u30(coder)
924                         args = list(reversed(
925                             [stack.pop() for _ in range(arg_count)]))
926                         obj = stack.pop()
927                         if mname == u'split':
928                             assert len(args) == 1
929                             assert isinstance(args[0], compat_str)
930                             assert isinstance(obj, compat_str)
931                             if args[0] == u'':
932                                 res = list(obj)
933                             else:
934                                 res = obj.split(args[0])
935                             stack.append(res)
936                         elif mname == u'slice':
937                             assert len(args) == 1
938                             assert isinstance(args[0], int)
939                             assert isinstance(obj, list)
940                             res = obj[args[0]:]
941                             stack.append(res)
942                         elif mname == u'join':
943                             assert len(args) == 1
944                             assert isinstance(args[0], compat_str)
945                             assert isinstance(obj, list)
946                             res = args[0].join(obj)
947                             stack.append(res)
948                         elif mname in method_pyfunctions:
949                             stack.append(method_pyfunctions[mname](args))
950                         else:
951                             raise NotImplementedError(
952                                 u'Unsupported property %r on %r'
953                                 % (mname, obj))
954                     elif opcode == 72:  # returnvalue
955                         res = stack.pop()
956                         return res
957                     elif opcode == 79:  # callpropvoid
958                         index = u30(coder)
959                         mname = multinames[index]
960                         arg_count = u30(coder)
961                         args = list(reversed(
962                             [stack.pop() for _ in range(arg_count)]))
963                         obj = stack.pop()
964                         if mname == u'reverse':
965                             assert isinstance(obj, list)
966                             obj.reverse()
967                         else:
968                             raise NotImplementedError(
969                                 u'Unsupported (void) property %r on %r'
970                                 % (mname, obj))
971                     elif opcode == 93:  # findpropstrict
972                         index = u30(coder)
973                         mname = multinames[index]
974                         res = extract_function(mname)
975                         stack.append(res)
976                     elif opcode == 97:  # setproperty
977                         index = u30(coder)
978                         value = stack.pop()
979                         idx = stack.pop()
980                         obj = stack.pop()
981                         assert isinstance(obj, list)
982                         assert isinstance(idx, int)
983                         obj[idx] = value
984                     elif opcode == 98:  # getlocal
985                         index = u30(coder)
986                         stack.append(registers[index])
987                     elif opcode == 99:  # setlocal
988                         index = u30(coder)
989                         value = stack.pop()
990                         registers[index] = value
991                     elif opcode == 102:  # getproperty
992                         index = u30(coder)
993                         pname = multinames[index]
994                         if pname == u'length':
995                             obj = stack.pop()
996                             assert isinstance(obj, list)
997                             stack.append(len(obj))
998                         else:  # Assume attribute access
999                             idx = stack.pop()
1000                             assert isinstance(idx, int)
1001                             obj = stack.pop()
1002                             assert isinstance(obj, list)
1003                             stack.append(obj[idx])
1004                     elif opcode == 128:  # coerce
1005                         u30(coder)
1006                     elif opcode == 133:  # coerce_s
1007                         assert isinstance(stack[-1], (type(None), compat_str))
1008                     elif opcode == 164:  # modulo
1009                         value2 = stack.pop()
1010                         value1 = stack.pop()
1011                         res = value1 % value2
1012                         stack.append(res)
1013                     elif opcode == 208:  # getlocal_0
1014                         stack.append(registers[0])
1015                     elif opcode == 209:  # getlocal_1
1016                         stack.append(registers[1])
1017                     elif opcode == 210:  # getlocal_2
1018                         stack.append(registers[2])
1019                     elif opcode == 211:  # getlocal_3
1020                         stack.append(registers[3])
1021                     elif opcode == 214:  # setlocal_2
1022                         registers[2] = stack.pop()
1023                     elif opcode == 215:  # setlocal_3
1024                         registers[3] = stack.pop()
1025                     else:
1026                         raise NotImplementedError(
1027                             u'Unsupported opcode %d' % opcode)
1028
1029             method_pyfunctions[func_name] = resfunc
1030             return resfunc
1031
1032         initial_function = extract_function(u'decipher')
1033         return lambda s: initial_function([s])
1034
1035     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1036         """Turn the encrypted s field into a working signature"""
1037
1038         if player_url is not None:
1039             try:
1040                 if player_url not in self._player_cache:
1041                     func = self._extract_signature_function(
1042                         video_id, player_url, len(s)
1043                     )
1044                     self._player_cache[player_url] = func
1045                 func = self._player_cache[player_url]
1046                 if self._downloader.params.get('youtube_print_sig_code'):
1047                     self._print_sig_code(func, len(s))
1048                 return func(s)
1049             except Exception:
1050                 tb = traceback.format_exc()
1051                 self._downloader.report_warning(
1052                     u'Automatic signature extraction failed: ' + tb)
1053
1054             self._downloader.report_warning(
1055                 u'Warning: Falling back to static signature algorithm')
1056
1057         return self._static_decrypt_signature(
1058             s, video_id, player_url, age_gate)
1059
1060     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1061         if age_gate:
1062             # The videos with age protection use another player, so the
1063             # algorithms can be different.
1064             if len(s) == 86:
1065                 return s[2:63] + s[82] + s[64:82] + s[63]
1066
1067         if len(s) == 93:
1068             return s[86:29:-1] + s[88] + s[28:5:-1]
1069         elif len(s) == 92:
1070             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1071         elif len(s) == 91:
1072             return s[84:27:-1] + s[86] + s[26:5:-1]
1073         elif len(s) == 90:
1074             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1075         elif len(s) == 89:
1076             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1077         elif len(s) == 88:
1078             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1079         elif len(s) == 87:
1080             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1081         elif len(s) == 86:
1082             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1083         elif len(s) == 85:
1084             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1085         elif len(s) == 84:
1086             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1087         elif len(s) == 83:
1088             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1089         elif len(s) == 82:
1090             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1091         elif len(s) == 81:
1092             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1093         elif len(s) == 80:
1094             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1095         elif len(s) == 79:
1096             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1097
1098         else:
1099             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1100
1101     def _get_available_subtitles(self, video_id):
1102         try:
1103             sub_list = self._download_webpage(
1104                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1105                 video_id, note=False)
1106         except ExtractorError as err:
1107             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1108             return {}
1109         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1110
1111         sub_lang_list = {}
1112         for l in lang_list:
1113             lang = l[1]
1114             params = compat_urllib_parse.urlencode({
1115                 'lang': lang,
1116                 'v': video_id,
1117                 'fmt': self._downloader.params.get('subtitlesformat'),
1118             })
1119             url = u'http://www.youtube.com/api/timedtext?' + params
1120             sub_lang_list[lang] = url
1121         if not sub_lang_list:
1122             self._downloader.report_warning(u'video doesn\'t have subtitles')
1123             return {}
1124         return sub_lang_list
1125
1126     def _get_available_automatic_caption(self, video_id, webpage):
1127         """We need the webpage for getting the captions url, pass it as an
1128            argument to speed up the process."""
1129         sub_format = self._downloader.params.get('subtitlesformat')
1130         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1131         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1132         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1133         if mobj is None:
1134             self._downloader.report_warning(err_msg)
1135             return {}
1136         player_config = json.loads(mobj.group(1))
1137         try:
1138             args = player_config[u'args']
1139             caption_url = args[u'ttsurl']
1140             timestamp = args[u'timestamp']
1141             # We get the available subtitles
1142             list_params = compat_urllib_parse.urlencode({
1143                 'type': 'list',
1144                 'tlangs': 1,
1145                 'asrs': 1,
1146             })
1147             list_url = caption_url + '&' + list_params
1148             list_page = self._download_webpage(list_url, video_id)
1149             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1150             original_lang_node = caption_list.find('track')
1151             if original_lang_node.attrib.get('kind') != 'asr' :
1152                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1153                 return {}
1154             original_lang = original_lang_node.attrib['lang_code']
1155
1156             sub_lang_list = {}
1157             for lang_node in caption_list.findall('target'):
1158                 sub_lang = lang_node.attrib['lang_code']
1159                 params = compat_urllib_parse.urlencode({
1160                     'lang': original_lang,
1161                     'tlang': sub_lang,
1162                     'fmt': sub_format,
1163                     'ts': timestamp,
1164                     'kind': 'asr',
1165                 })
1166                 sub_lang_list[sub_lang] = caption_url + '&' + params
1167             return sub_lang_list
1168         # An extractor error can be raise by the download process if there are
1169         # no automatic captions but there are subtitles
1170         except (KeyError, ExtractorError):
1171             self._downloader.report_warning(err_msg)
1172             return {}
1173
1174     def _print_formats(self, formats):
1175         print('Available formats:')
1176         for x in formats:
1177             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1178                                         self._video_dimensions.get(x, '???'),
1179                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1180
1181     def _extract_id(self, url):
1182         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1183         if mobj is None:
1184             raise ExtractorError(u'Invalid URL: %s' % url)
1185         video_id = mobj.group(2)
1186         return video_id
1187
1188     def _get_video_url_list(self, url_map):
1189         """
1190         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1191         with the requested formats.
1192         """
1193         req_format = self._downloader.params.get('format', None)
1194         format_limit = self._downloader.params.get('format_limit', None)
1195         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1196         if format_limit is not None and format_limit in available_formats:
1197             format_list = available_formats[available_formats.index(format_limit):]
1198         else:
1199             format_list = available_formats
1200         existing_formats = [x for x in format_list if x in url_map]
1201         if len(existing_formats) == 0:
1202             raise ExtractorError(u'no known formats available for video')
1203         if self._downloader.params.get('listformats', None):
1204             self._print_formats(existing_formats)
1205             return
1206         if req_format is None or req_format == 'best':
1207             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1208         elif req_format == 'worst':
1209             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1210         elif req_format in ('-1', 'all'):
1211             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1212         else:
1213             # Specific formats. We pick the first in a slash-delimeted sequence.
1214             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1215             # available in the specified format. For example,
1216             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1217             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1218             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1219             req_formats = req_format.split('/')
1220             video_url_list = None
1221             for rf in req_formats:
1222                 if rf in url_map:
1223                     video_url_list = [(rf, url_map[rf])]
1224                     break
1225                 if rf in self._video_formats_map:
1226                     for srf in self._video_formats_map[rf]:
1227                         if srf in url_map:
1228                             video_url_list = [(srf, url_map[srf])]
1229                             break
1230                     else:
1231                         continue
1232                     break
1233             if video_url_list is None:
1234                 raise ExtractorError(u'requested format not available')
1235         return video_url_list
1236
1237     def _extract_from_m3u8(self, manifest_url, video_id):
1238         url_map = {}
1239         def _get_urls(_manifest):
1240             lines = _manifest.split('\n')
1241             urls = filter(lambda l: l and not l.startswith('#'),
1242                             lines)
1243             return urls
1244         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1245         formats_urls = _get_urls(manifest)
1246         for format_url in formats_urls:
1247             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1248             url_map[itag] = format_url
1249         return url_map
1250
1251     def _real_extract(self, url):
1252         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1253             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1254
1255         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1256         mobj = re.search(self._NEXT_URL_RE, url)
1257         if mobj:
1258             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1259         video_id = self._extract_id(url)
1260
1261         # Get video webpage
1262         self.report_video_webpage_download(video_id)
1263         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1264         request = compat_urllib_request.Request(url)
1265         try:
1266             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1267         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1268             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1269
1270         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1271
1272         # Attempt to extract SWF player URL
1273         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1274         if mobj is not None:
1275             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1276         else:
1277             player_url = None
1278
1279         # Get video info
1280         self.report_video_info_webpage_download(video_id)
1281         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1282             self.report_age_confirmation()
1283             age_gate = True
1284             # We simulate the access to the video from www.youtube.com/v/{video_id}
1285             # this can be viewed without login into Youtube
1286             data = compat_urllib_parse.urlencode({'video_id': video_id,
1287                                                   'el': 'embedded',
1288                                                   'gl': 'US',
1289                                                   'hl': 'en',
1290                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1291                                                   'asv': 3,
1292                                                   'sts':'1588',
1293                                                   })
1294             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1295             video_info_webpage = self._download_webpage(video_info_url, video_id,
1296                                     note=False,
1297                                     errnote='unable to download video info webpage')
1298             video_info = compat_parse_qs(video_info_webpage)
1299         else:
1300             age_gate = False
1301             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1302                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1303                         % (video_id, el_type))
1304                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1305                                         note=False,
1306                                         errnote='unable to download video info webpage')
1307                 video_info = compat_parse_qs(video_info_webpage)
1308                 if 'token' in video_info:
1309                     break
1310         if 'token' not in video_info:
1311             if 'reason' in video_info:
1312                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1313             else:
1314                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1315
1316         # Check for "rental" videos
1317         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1318             raise ExtractorError(u'"rental" videos not supported')
1319
1320         # Start extracting information
1321         self.report_information_extraction(video_id)
1322
1323         # uploader
1324         if 'author' not in video_info:
1325             raise ExtractorError(u'Unable to extract uploader name')
1326         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1327
1328         # uploader_id
1329         video_uploader_id = None
1330         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1331         if mobj is not None:
1332             video_uploader_id = mobj.group(1)
1333         else:
1334             self._downloader.report_warning(u'unable to extract uploader nickname')
1335
1336         # title
1337         if 'title' not in video_info:
1338             raise ExtractorError(u'Unable to extract video title')
1339         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1340
1341         # thumbnail image
1342         # We try first to get a high quality image:
1343         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1344                             video_webpage, re.DOTALL)
1345         if m_thumb is not None:
1346             video_thumbnail = m_thumb.group(1)
1347         elif 'thumbnail_url' not in video_info:
1348             self._downloader.report_warning(u'unable to extract video thumbnail')
1349             video_thumbnail = None
1350         else:   # don't panic if we can't find it
1351             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1352
1353         # upload date
1354         upload_date = None
1355         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1356         if mobj is not None:
1357             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1358             upload_date = unified_strdate(upload_date)
1359
1360         # description
1361         video_description = get_element_by_id("eow-description", video_webpage)
1362         if video_description:
1363             video_description = clean_html(video_description)
1364         else:
1365             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1366             if fd_mobj:
1367                 video_description = unescapeHTML(fd_mobj.group(1))
1368             else:
1369                 video_description = u''
1370
1371         # subtitles
1372         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1373
1374         if self._downloader.params.get('listsubtitles', False):
1375             self._list_available_subtitles(video_id, video_webpage)
1376             return
1377
1378         if 'length_seconds' not in video_info:
1379             self._downloader.report_warning(u'unable to extract video duration')
1380             video_duration = ''
1381         else:
1382             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1383
1384         # Decide which formats to download
1385
1386         try:
1387             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1388             if not mobj:
1389                 raise ValueError('Could not find vevo ID')
1390             info = json.loads(mobj.group(1))
1391             args = info['args']
1392             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1393             # this signatures are encrypted
1394             if 'url_encoded_fmt_stream_map' not in args:
1395                 raise ValueError(u'No stream_map present')  # caught below
1396             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1397             if m_s is not None:
1398                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1399                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1400             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1401             if m_s is not None:
1402                 if 'url_encoded_fmt_stream_map' in video_info:
1403                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1404                 else:
1405                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1406             elif 'adaptive_fmts' in video_info:
1407                 if 'url_encoded_fmt_stream_map' in video_info:
1408                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1409                 else:
1410                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1411         except ValueError:
1412             pass
1413
1414         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1415             self.report_rtmp_download()
1416             video_url_list = [(None, video_info['conn'][0])]
1417         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1418             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1419                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1420             url_map = {}
1421             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1422                 url_data = compat_parse_qs(url_data_str)
1423                 if 'itag' in url_data and 'url' in url_data:
1424                     url = url_data['url'][0]
1425                     if 'sig' in url_data:
1426                         url += '&signature=' + url_data['sig'][0]
1427                     elif 's' in url_data:
1428                         encrypted_sig = url_data['s'][0]
1429                         if self._downloader.params.get('verbose'):
1430                             if age_gate:
1431                                 if player_url is None:
1432                                     player_version = 'unknown'
1433                                 else:
1434                                     player_version = self._search_regex(
1435                                         r'-(.+)\.swf$', player_url,
1436                                         u'flash player', fatal=False)
1437                                 player_desc = 'flash player %s' % player_version
1438                             else:
1439                                 player_version = self._search_regex(
1440                                     r'html5player-(.+?)\.js', video_webpage,
1441                                     'html5 player', fatal=False)
1442                                 player_desc = u'html5 player %s' % player_version
1443
1444                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1445                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1446                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1447
1448                         if not age_gate:
1449                             jsplayer_url_json = self._search_regex(
1450                                 r'"assets":.+?"js":\s*("[^"]+")',
1451                                 video_webpage, u'JS player URL')
1452                             player_url = json.loads(jsplayer_url_json)
1453
1454                         signature = self._decrypt_signature(
1455                             encrypted_sig, video_id, player_url, age_gate)
1456                         url += '&signature=' + signature
1457                     if 'ratebypass' not in url:
1458                         url += '&ratebypass=yes'
1459                     url_map[url_data['itag'][0]] = url
1460             video_url_list = self._get_video_url_list(url_map)
1461             if not video_url_list:
1462                 return
1463         elif video_info.get('hlsvp'):
1464             manifest_url = video_info['hlsvp'][0]
1465             url_map = self._extract_from_m3u8(manifest_url, video_id)
1466             video_url_list = self._get_video_url_list(url_map)
1467             if not video_url_list:
1468                 return
1469
1470         else:
1471             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1472
1473         results = []
1474         for format_param, video_real_url in video_url_list:
1475             # Extension
1476             video_extension = self._video_extensions.get(format_param, 'flv')
1477
1478             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1479                                               self._video_dimensions.get(format_param, '???'),
1480                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1481
1482             results.append({
1483                 'id':       video_id,
1484                 'url':      video_real_url,
1485                 'uploader': video_uploader,
1486                 'uploader_id': video_uploader_id,
1487                 'upload_date':  upload_date,
1488                 'title':    video_title,
1489                 'ext':      video_extension,
1490                 'format':   video_format,
1491                 'thumbnail':    video_thumbnail,
1492                 'description':  video_description,
1493                 'player_url':   player_url,
1494                 'subtitles':    video_subtitles,
1495                 'duration':     video_duration
1496             })
1497         return results
1498
1499 class YoutubePlaylistIE(InfoExtractor):
1500     IE_DESC = u'YouTube.com playlists'
1501     _VALID_URL = r"""(?:
1502                         (?:https?://)?
1503                         (?:\w+\.)?
1504                         youtube\.com/
1505                         (?:
1506                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1507                            \? (?:.*?&)*? (?:p|a|list)=
1508                         |  p/
1509                         )
1510                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1511                         .*
1512                      |
1513                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1514                      )"""
1515     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1516     _MAX_RESULTS = 50
1517     IE_NAME = u'youtube:playlist'
1518
1519     @classmethod
1520     def suitable(cls, url):
1521         """Receives a URL and returns True if suitable for this IE."""
1522         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1523
1524     def _real_extract(self, url):
1525         # Extract playlist id
1526         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1527         if mobj is None:
1528             raise ExtractorError(u'Invalid URL: %s' % url)
1529         playlist_id = mobj.group(1) or mobj.group(2)
1530
1531         # Check if it's a video-specific URL
1532         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1533         if 'v' in query_dict:
1534             video_id = query_dict['v'][0]
1535             if self._downloader.params.get('noplaylist'):
1536                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1537                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1538             else:
1539                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1540
1541         # Download playlist videos from API
1542         videos = []
1543
1544         for page_num in itertools.count(1):
1545             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1546             if start_index >= 1000:
1547                 self._downloader.report_warning(u'Max number of results reached')
1548                 break
1549             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1550             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1551
1552             try:
1553                 response = json.loads(page)
1554             except ValueError as err:
1555                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1556
1557             if 'feed' not in response:
1558                 raise ExtractorError(u'Got a malformed response from YouTube API')
1559             playlist_title = response['feed']['title']['$t']
1560             if 'entry' not in response['feed']:
1561                 # Number of videos is a multiple of self._MAX_RESULTS
1562                 break
1563
1564             for entry in response['feed']['entry']:
1565                 index = entry['yt$position']['$t']
1566                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1567                     videos.append((
1568                         index,
1569                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1570                     ))
1571
1572         videos = [v[1] for v in sorted(videos)]
1573
1574         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1575         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1576
1577
1578 class YoutubeChannelIE(InfoExtractor):
1579     IE_DESC = u'YouTube.com channels'
1580     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1581     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1582     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1583     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1584     IE_NAME = u'youtube:channel'
1585
1586     def extract_videos_from_page(self, page):
1587         ids_in_page = []
1588         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1589             if mobj.group(1) not in ids_in_page:
1590                 ids_in_page.append(mobj.group(1))
1591         return ids_in_page
1592
1593     def _real_extract(self, url):
1594         # Extract channel id
1595         mobj = re.match(self._VALID_URL, url)
1596         if mobj is None:
1597             raise ExtractorError(u'Invalid URL: %s' % url)
1598
1599         # Download channel page
1600         channel_id = mobj.group(1)
1601         video_ids = []
1602         pagenum = 1
1603
1604         url = self._TEMPLATE_URL % (channel_id, pagenum)
1605         page = self._download_webpage(url, channel_id,
1606                                       u'Downloading page #%s' % pagenum)
1607
1608         # Extract video identifiers
1609         ids_in_page = self.extract_videos_from_page(page)
1610         video_ids.extend(ids_in_page)
1611
1612         # Download any subsequent channel pages using the json-based channel_ajax query
1613         if self._MORE_PAGES_INDICATOR in page:
1614             for pagenum in itertools.count(1):
1615                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1616                 page = self._download_webpage(url, channel_id,
1617                                               u'Downloading page #%s' % pagenum)
1618
1619                 page = json.loads(page)
1620
1621                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1622                 video_ids.extend(ids_in_page)
1623
1624                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1625                     break
1626
1627         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1628
1629         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1630         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1631         return [self.playlist_result(url_entries, channel_id)]
1632
1633
1634 class YoutubeUserIE(InfoExtractor):
1635     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1636     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1637     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1638     _GDATA_PAGE_SIZE = 50
1639     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1640     IE_NAME = u'youtube:user'
1641
1642     @classmethod
1643     def suitable(cls, url):
1644         # Don't return True if the url can be extracted with other youtube
1645         # extractor, the regex would is too permissive and it would match.
1646         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1647         if any(ie.suitable(url) for ie in other_ies): return False
1648         else: return super(YoutubeUserIE, cls).suitable(url)
1649
1650     def _real_extract(self, url):
1651         # Extract username
1652         mobj = re.match(self._VALID_URL, url)
1653         if mobj is None:
1654             raise ExtractorError(u'Invalid URL: %s' % url)
1655
1656         username = mobj.group(1)
1657
1658         # Download video ids using YouTube Data API. Result size per
1659         # query is limited (currently to 50 videos) so we need to query
1660         # page by page until there are no video ids - it means we got
1661         # all of them.
1662
1663         video_ids = []
1664
1665         for pagenum in itertools.count(0):
1666             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1667
1668             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1669             page = self._download_webpage(gdata_url, username,
1670                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1671
1672             try:
1673                 response = json.loads(page)
1674             except ValueError as err:
1675                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1676             if 'entry' not in response['feed']:
1677                 # Number of videos is a multiple of self._MAX_RESULTS
1678                 break
1679
1680             # Extract video identifiers
1681             ids_in_page = []
1682             for entry in response['feed']['entry']:
1683                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1684             video_ids.extend(ids_in_page)
1685
1686             # A little optimization - if current page is not
1687             # "full", ie. does not contain PAGE_SIZE video ids then
1688             # we can assume that this page is the last one - there
1689             # are no more ids on further pages - no need to query
1690             # again.
1691
1692             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1693                 break
1694
1695         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1696         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1697         return [self.playlist_result(url_results, playlist_title = username)]
1698
1699 class YoutubeSearchIE(SearchInfoExtractor):
1700     IE_DESC = u'YouTube.com searches'
1701     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1702     _MAX_RESULTS = 1000
1703     IE_NAME = u'youtube:search'
1704     _SEARCH_KEY = 'ytsearch'
1705
1706     def report_download_page(self, query, pagenum):
1707         """Report attempt to download search page with given number."""
1708         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1709
1710     def _get_n_results(self, query, n):
1711         """Get a specified number of results for a query"""
1712
1713         video_ids = []
1714         pagenum = 0
1715         limit = n
1716
1717         while (50 * pagenum) < limit:
1718             self.report_download_page(query, pagenum+1)
1719             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1720             request = compat_urllib_request.Request(result_url)
1721             try:
1722                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1723             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1724                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1725             api_response = json.loads(data)['data']
1726
1727             if not 'items' in api_response:
1728                 raise ExtractorError(u'[youtube] No video results')
1729
1730             new_ids = list(video['id'] for video in api_response['items'])
1731             video_ids += new_ids
1732
1733             limit = min(n, api_response['totalItems'])
1734             pagenum += 1
1735
1736         if len(video_ids) > n:
1737             video_ids = video_ids[:n]
1738         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1739         return self.playlist_result(videos, query)
1740
1741
1742 class YoutubeShowIE(InfoExtractor):
1743     IE_DESC = u'YouTube.com (multi-season) shows'
1744     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1745     IE_NAME = u'youtube:show'
1746
1747     def _real_extract(self, url):
1748         mobj = re.match(self._VALID_URL, url)
1749         show_name = mobj.group(1)
1750         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1751         # There's one playlist for each season of the show
1752         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1753         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1754         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1755
1756
1757 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1758     """
1759     Base class for extractors that fetch info from
1760     http://www.youtube.com/feed_ajax
1761     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1762     """
1763     _LOGIN_REQUIRED = True
1764     _PAGING_STEP = 30
1765     # use action_load_personal_feed instead of action_load_system_feed
1766     _PERSONAL_FEED = False
1767
1768     @property
1769     def _FEED_TEMPLATE(self):
1770         action = 'action_load_system_feed'
1771         if self._PERSONAL_FEED:
1772             action = 'action_load_personal_feed'
1773         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1774
1775     @property
1776     def IE_NAME(self):
1777         return u'youtube:%s' % self._FEED_NAME
1778
1779     def _real_initialize(self):
1780         self._login()
1781
1782     def _real_extract(self, url):
1783         feed_entries = []
1784         # The step argument is available only in 2.7 or higher
1785         for i in itertools.count(0):
1786             paging = i*self._PAGING_STEP
1787             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1788                                           u'%s feed' % self._FEED_NAME,
1789                                           u'Downloading page %s' % i)
1790             info = json.loads(info)
1791             feed_html = info['feed_html']
1792             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1793             ids = orderedSet(m.group(1) for m in m_ids)
1794             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1795             if info['paging'] is None:
1796                 break
1797         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1798
1799 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1800     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1801     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1802     _FEED_NAME = 'subscriptions'
1803     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1804
1805 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1806     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1807     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1808     _FEED_NAME = 'recommended'
1809     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1810
1811 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1814     _FEED_NAME = 'watch_later'
1815     _PLAYLIST_TITLE = u'Youtube Watch Later'
1816     _PAGING_STEP = 100
1817     _PERSONAL_FEED = True
1818
1819 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1820     IE_NAME = u'youtube:favorites'
1821     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1822     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1823     _LOGIN_REQUIRED = True
1824
1825     def _real_extract(self, url):
1826         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1827         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1828         return self.url_result(playlist_id, 'YoutubePlaylist')