Do not warn if fallback is without alternatives (because we did not get the flash...
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import xml.etree.ElementTree
15 import zlib
16
17 from .common import InfoExtractor, SearchInfoExtractor
18 from .subtitles import SubtitlesInfoExtractor
19 from ..utils import (
20     compat_chr,
21     compat_http_client,
22     compat_parse_qs,
23     compat_urllib_error,
24     compat_urllib_parse,
25     compat_urllib_request,
26     compat_str,
27
28     clean_html,
29     get_element_by_id,
30     ExtractorError,
31     unescapeHTML,
32     unified_strdate,
33     orderedSet,
34     write_json_file,
35 )
36
37 class YoutubeBaseInfoExtractor(InfoExtractor):
38     """Provide base functions for Youtube extractors"""
39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
42     _NETRC_MACHINE = 'youtube'
43     # If True it will raise an error if no login info is provided
44     _LOGIN_REQUIRED = False
45
46     def report_lang(self):
47         """Report attempt to set language."""
48         self.to_screen(u'Setting language')
49
50     def _set_language(self):
51         request = compat_urllib_request.Request(self._LANG_URL)
52         try:
53             self.report_lang()
54             compat_urllib_request.urlopen(request).read()
55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
57             return False
58         return True
59
60     def _login(self):
61         (username, password) = self._get_login_info()
62         # No authentication to be performed
63         if username is None:
64             if self._LOGIN_REQUIRED:
65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
66             return False
67
68         request = compat_urllib_request.Request(self._LOGIN_URL)
69         try:
70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
73             return False
74
75         galx = None
76         dsh = None
77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
78         if match:
79           galx = match.group(1)
80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
81         if match:
82           dsh = match.group(1)
83
84         # Log in
85         login_form_strs = {
86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
87                 u'Email': username,
88                 u'GALX': galx,
89                 u'Passwd': password,
90                 u'PersistentCookie': u'yes',
91                 u'_utf8': u'霱',
92                 u'bgresponse': u'js_disabled',
93                 u'checkConnection': u'',
94                 u'checkedDomains': u'youtube',
95                 u'dnConn': u'',
96                 u'dsh': dsh,
97                 u'pstMsg': u'0',
98                 u'rmShown': u'1',
99                 u'secTok': u'',
100                 u'signIn': u'Sign in',
101                 u'timeStmp': u'',
102                 u'service': u'youtube',
103                 u'uilel': u'3',
104                 u'hl': u'en_US',
105         }
106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
107         # chokes on unicode
108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
111         try:
112             self.report_login()
113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
115                 self._downloader.report_warning(u'unable to log in: bad username or password')
116                 return False
117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
119             return False
120         return True
121
122     def _confirm_age(self):
123         age_form = {
124                 'next_url':     '/',
125                 'action_confirm':   'Confirm',
126                 }
127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
128         try:
129             self.report_age_confirmation()
130             compat_urllib_request.urlopen(request).read().decode('utf-8')
131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
133         return True
134
135     def _real_initialize(self):
136         if self._downloader is None:
137             return
138         if not self._set_language():
139             return
140         if not self._login():
141             return
142         self._confirm_age()
143
144
145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
146     IE_DESC = u'YouTube.com'
147     _VALID_URL = r"""^
148                      (
149                          (?:https?://)?                                       # http(s):// (optional)
150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
151                             tube\.majestyc\.net/|
152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
154                          (?:                                                  # the various things that can precede the ID:
155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
156                              |(?:                                             # or the v= param in all its forms
157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
160                                  v=
161                              )
162                          ))
163                          |youtu\.be/                                          # just youtu.be/xxxx
164                          )
165                      )?                                                       # all until now is optional -> you can pass the naked ID
166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
167                      (?(1).+)?                                                # if we found the ID, everything can follow
168                      $"""
169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
170     # Listed in order of quality
171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
172                           # Apple HTTP Live Streaming
173                           '96', '95', '94', '93', '92', '132', '151',
174                           # 3D
175                           '85', '84', '102', '83', '101', '82', '100',
176                           # Dash video
177                           '138', '137', '248', '136', '247', '135', '246',
178                           '245', '244', '134', '243', '133', '242', '160',
179                           # Dash audio
180                           '141', '172', '140', '171', '139',
181                           ]
182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
183                                       # Apple HTTP Live Streaming
184                                       '96', '95', '94', '93', '92', '132', '151',
185                                       # 3D
186                                       '85', '102', '84', '101', '83', '100', '82',
187                                       # Dash video
188                                       '138', '248', '137', '247', '136', '246', '245',
189                                       '244', '135', '243', '134', '242', '133', '160',
190                                       # Dash audio
191                                       '172', '141', '171', '140', '139',
192                                       ]
193     _video_formats_map = {
194         'flv': ['35', '34', '6', '5'],
195         '3gp': ['36', '17', '13'],
196         'mp4': ['38', '37', '22', '18'],
197         'webm': ['46', '45', '44', '43'],
198     }
199     _video_extensions = {
200         '13': '3gp',
201         '17': '3gp',
202         '18': 'mp4',
203         '22': 'mp4',
204         '36': '3gp',
205         '37': 'mp4',
206         '38': 'mp4',
207         '43': 'webm',
208         '44': 'webm',
209         '45': 'webm',
210         '46': 'webm',
211
212         # 3d videos
213         '82': 'mp4',
214         '83': 'mp4',
215         '84': 'mp4',
216         '85': 'mp4',
217         '100': 'webm',
218         '101': 'webm',
219         '102': 'webm',
220
221         # Apple HTTP Live Streaming
222         '92': 'mp4',
223         '93': 'mp4',
224         '94': 'mp4',
225         '95': 'mp4',
226         '96': 'mp4',
227         '132': 'mp4',
228         '151': 'mp4',
229
230         # Dash mp4
231         '133': 'mp4',
232         '134': 'mp4',
233         '135': 'mp4',
234         '136': 'mp4',
235         '137': 'mp4',
236         '138': 'mp4',
237         '139': 'mp4',
238         '140': 'mp4',
239         '141': 'mp4',
240         '160': 'mp4',
241
242         # Dash webm
243         '171': 'webm',
244         '172': 'webm',
245         '242': 'webm',
246         '243': 'webm',
247         '244': 'webm',
248         '245': 'webm',
249         '246': 'webm',
250         '247': 'webm',
251         '248': 'webm',
252     }
253     _video_dimensions = {
254         '5': '240x400',
255         '6': '???',
256         '13': '???',
257         '17': '144x176',
258         '18': '360x640',
259         '22': '720x1280',
260         '34': '360x640',
261         '35': '480x854',
262         '36': '240x320',
263         '37': '1080x1920',
264         '38': '3072x4096',
265         '43': '360x640',
266         '44': '480x854',
267         '45': '720x1280',
268         '46': '1080x1920',
269         '82': '360p',
270         '83': '480p',
271         '84': '720p',
272         '85': '1080p',
273         '92': '240p',
274         '93': '360p',
275         '94': '480p',
276         '95': '720p',
277         '96': '1080p',
278         '100': '360p',
279         '101': '480p',
280         '102': '720p',
281         '132': '240p',
282         '151': '72p',
283         '133': '240p',
284         '134': '360p',
285         '135': '480p',
286         '136': '720p',
287         '137': '1080p',
288         '138': '>1080p',
289         '139': '48k',
290         '140': '128k',
291         '141': '256k',
292         '160': '192p',
293         '171': '128k',
294         '172': '256k',
295         '242': '240p',
296         '243': '360p',
297         '244': '480p',
298         '245': '480p',
299         '246': '480p',
300         '247': '720p',
301         '248': '1080p',
302     }
303     _special_itags = {
304         '82': '3D',
305         '83': '3D',
306         '84': '3D',
307         '85': '3D',
308         '100': '3D',
309         '101': '3D',
310         '102': '3D',
311         '133': 'DASH Video',
312         '134': 'DASH Video',
313         '135': 'DASH Video',
314         '136': 'DASH Video',
315         '137': 'DASH Video',
316         '138': 'DASH Video',
317         '139': 'DASH Audio',
318         '140': 'DASH Audio',
319         '141': 'DASH Audio',
320         '160': 'DASH Video',
321         '171': 'DASH Audio',
322         '172': 'DASH Audio',
323         '242': 'DASH Video',
324         '243': 'DASH Video',
325         '244': 'DASH Video',
326         '245': 'DASH Video',
327         '246': 'DASH Video',
328         '247': 'DASH Video',
329         '248': 'DASH Video',
330     }
331
332     IE_NAME = u'youtube'
333     _TESTS = [
334         {
335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
336             u"file":  u"BaW_jenozKc.mp4",
337             u"info_dict": {
338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
339                 u"uploader": u"Philipp Hagemeister",
340                 u"uploader_id": u"phihag",
341                 u"upload_date": u"20121002",
342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
343             }
344         },
345         {
346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
347             u"file":  u"1ltcDfZMA3U.flv",
348             u"note": u"Test VEVO video (#897)",
349             u"info_dict": {
350                 u"upload_date": u"20070518",
351                 u"title": u"Maps - It Will Find You",
352                 u"description": u"Music video by Maps performing It Will Find You.",
353                 u"uploader": u"MuteUSA",
354                 u"uploader_id": u"MuteUSA"
355             }
356         },
357         {
358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
359             u"file":  u"UxxajLWwzqY.mp4",
360             u"note": u"Test generic use_cipher_signature video (#897)",
361             u"info_dict": {
362                 u"upload_date": u"20120506",
363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
365                 u"uploader": u"Icona Pop",
366                 u"uploader_id": u"IconaPop"
367             }
368         },
369         {
370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
371             u"file":  u"07FYdnEawAQ.mp4",
372             u"note": u"Test VEVO video with age protection (#956)",
373             u"info_dict": {
374                 u"upload_date": u"20130703",
375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
377                 u"uploader": u"justintimberlakeVEVO",
378                 u"uploader_id": u"justintimberlakeVEVO"
379             }
380         },
381         {
382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
383             u'file': u'TGi3HqYrWHE.mp4',
384             u'note': u'm3u8 video',
385             u'info_dict': {
386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
388                 u'uploader': u'olympic',
389                 u'upload_date': u'20120807',
390                 u'uploader_id': u'olympic',
391             },
392             u'params': {
393                 u'skip_download': True,
394             },
395         },
396     ]
397
398
399     @classmethod
400     def suitable(cls, url):
401         """Receives a URL and returns True if suitable for this IE."""
402         if YoutubePlaylistIE.suitable(url): return False
403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
404
405     def __init__(self, *args, **kwargs):
406         super(YoutubeIE, self).__init__(*args, **kwargs)
407         self._player_cache = {}
408
409     def report_video_webpage_download(self, video_id):
410         """Report attempt to download video webpage."""
411         self.to_screen(u'%s: Downloading video webpage' % video_id)
412
413     def report_video_info_webpage_download(self, video_id):
414         """Report attempt to download video info webpage."""
415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
416
417     def report_information_extraction(self, video_id):
418         """Report attempt to extract video information."""
419         self.to_screen(u'%s: Extracting video information' % video_id)
420
421     def report_unavailable_format(self, video_id, format):
422         """Report extracted video URL."""
423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
424
425     def report_rtmp_download(self):
426         """Indicate the download will use the RTMP protocol."""
427         self.to_screen(u'RTMP download detected')
428
429     def _extract_signature_function(self, video_id, player_url, slen):
430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
431                         player_url)
432         player_type = id_m.group('ext')
433         player_id = id_m.group('id')
434
435         # Read from filesystem cache
436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
437         assert os.path.basename(func_id) == func_id
438         cache_dir = self._downloader.params.get('cachedir',
439                                                 u'~/.youtube-dl/cache')
440
441         cache_enabled = cache_dir != u'NONE'
442         if cache_enabled:
443             cache_fn = os.path.join(os.path.expanduser(cache_dir),
444                                     u'youtube-sigfuncs',
445                                     func_id + '.json')
446             try:
447                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
448                     cache_spec = json.load(cachef)
449                 return lambda s: u''.join(s[i] for i in cache_spec)
450             except IOError:
451                 pass  # No cache available
452
453         if player_type == 'js':
454             code = self._download_webpage(
455                 player_url, video_id,
456                 note=u'Downloading %s player %s' % (player_type, player_id),
457                 errnote=u'Download of %s failed' % player_url)
458             res = self._parse_sig_js(code)
459         elif player_type == 'swf':
460             urlh = self._request_webpage(
461                 player_url, video_id,
462                 note=u'Downloading %s player %s' % (player_type, player_id),
463                 errnote=u'Download of %s failed' % player_url)
464             code = urlh.read()
465             res = self._parse_sig_swf(code)
466         else:
467             assert False, 'Invalid player type %r' % player_type
468
469         if cache_enabled:
470             try:
471                 cache_res = res(map(compat_chr, range(slen)))
472                 cache_spec = [ord(c) for c in cache_res]
473                 try:
474                     os.makedirs(os.path.dirname(cache_fn))
475                 except OSError as ose:
476                     if ose.errno != errno.EEXIST:
477                         raise
478                 write_json_file(cache_spec, cache_fn)
479             except Exception:
480                 tb = traceback.format_exc()
481                 self._downloader.report_warning(
482                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
483
484         return res
485
486     def _print_sig_code(self, func, slen):
487         def gen_sig_code(idxs):
488             def _genslice(start, end, step):
489                 starts = u'' if start == 0 else str(start)
490                 ends = u':%d' % (end+step)
491                 steps = u'' if step == 1 else (':%d' % step)
492                 return u's[%s%s%s]' % (starts, ends, steps)
493
494             step = None
495             start = '(Never used)'  # Quelch pyflakes warnings - start will be
496                                     # set as soon as step is set
497             for i, prev in zip(idxs[1:], idxs[:-1]):
498                 if step is not None:
499                     if i - prev == step:
500                         continue
501                     yield _genslice(start, prev, step)
502                     step = None
503                     continue
504                 if i - prev in [-1, 1]:
505                     step = i - prev
506                     start = prev
507                     continue
508                 else:
509                     yield u's[%d]' % prev
510             if step is None:
511                 yield u's[%d]' % i
512             else:
513                 yield _genslice(start, i, step)
514
515         cache_res = func(map(compat_chr, range(slen)))
516         cache_spec = [ord(c) for c in cache_res]
517         expr_code = u' + '.join(gen_sig_code(cache_spec))
518         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
519         self.to_screen(u'Extracted signature function:\n' + code)
520
521     def _parse_sig_js(self, jscode):
522         funcname = self._search_regex(
523             r'signature=([a-zA-Z]+)', jscode,
524             u'Initial JS player signature function name')
525
526         functions = {}
527
528         def argidx(varname):
529             return string.lowercase.index(varname)
530
531         def interpret_statement(stmt, local_vars, allow_recursion=20):
532             if allow_recursion < 0:
533                 raise ExtractorError(u'Recursion limit reached')
534
535             if stmt.startswith(u'var '):
536                 stmt = stmt[len(u'var '):]
537             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
538                              r'=(?P<expr>.*)$', stmt)
539             if ass_m:
540                 if ass_m.groupdict().get('index'):
541                     def assign(val):
542                         lvar = local_vars[ass_m.group('out')]
543                         idx = interpret_expression(ass_m.group('index'),
544                                                    local_vars, allow_recursion)
545                         assert isinstance(idx, int)
546                         lvar[idx] = val
547                         return val
548                     expr = ass_m.group('expr')
549                 else:
550                     def assign(val):
551                         local_vars[ass_m.group('out')] = val
552                         return val
553                     expr = ass_m.group('expr')
554             elif stmt.startswith(u'return '):
555                 assign = lambda v: v
556                 expr = stmt[len(u'return '):]
557             else:
558                 raise ExtractorError(
559                     u'Cannot determine left side of statement in %r' % stmt)
560
561             v = interpret_expression(expr, local_vars, allow_recursion)
562             return assign(v)
563
564         def interpret_expression(expr, local_vars, allow_recursion):
565             if expr.isdigit():
566                 return int(expr)
567
568             if expr.isalpha():
569                 return local_vars[expr]
570
571             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
572             if m:
573                 member = m.group('member')
574                 val = local_vars[m.group('in')]
575                 if member == 'split("")':
576                     return list(val)
577                 if member == 'join("")':
578                     return u''.join(val)
579                 if member == 'length':
580                     return len(val)
581                 if member == 'reverse()':
582                     return val[::-1]
583                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
584                 if slice_m:
585                     idx = interpret_expression(
586                         slice_m.group('idx'), local_vars, allow_recursion-1)
587                     return val[idx:]
588
589             m = re.match(
590                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
591             if m:
592                 val = local_vars[m.group('in')]
593                 idx = interpret_expression(m.group('idx'), local_vars,
594                                            allow_recursion-1)
595                 return val[idx]
596
597             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
598             if m:
599                 a = interpret_expression(m.group('a'),
600                                          local_vars, allow_recursion)
601                 b = interpret_expression(m.group('b'),
602                                          local_vars, allow_recursion)
603                 return a % b
604
605             m = re.match(
606                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
607             if m:
608                 fname = m.group('func')
609                 if fname not in functions:
610                     functions[fname] = extract_function(fname)
611                 argvals = [int(v) if v.isdigit() else local_vars[v]
612                            for v in m.group('args').split(',')]
613                 return functions[fname](argvals)
614             raise ExtractorError(u'Unsupported JS expression %r' % expr)
615
616         def extract_function(funcname):
617             func_m = re.search(
618                 r'function ' + re.escape(funcname) +
619                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
620                 jscode)
621             argnames = func_m.group('args').split(',')
622
623             def resf(args):
624                 local_vars = dict(zip(argnames, args))
625                 for stmt in func_m.group('code').split(';'):
626                     res = interpret_statement(stmt, local_vars)
627                 return res
628             return resf
629
630         initial_function = extract_function(funcname)
631         return lambda s: initial_function([s])
632
633     def _parse_sig_swf(self, file_contents):
634         if file_contents[1:3] != b'WS':
635             raise ExtractorError(
636                 u'Not an SWF file; header is %r' % file_contents[:3])
637         if file_contents[:1] == b'C':
638             content = zlib.decompress(file_contents[8:])
639         else:
640             raise NotImplementedError(u'Unsupported compression format %r' %
641                                       file_contents[:1])
642
643         def extract_tags(content):
644             pos = 0
645             while pos < len(content):
646                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
647                 pos += 2
648                 tag_code = header16 >> 6
649                 tag_len = header16 & 0x3f
650                 if tag_len == 0x3f:
651                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
652                     pos += 4
653                 assert pos+tag_len <= len(content)
654                 yield (tag_code, content[pos:pos+tag_len])
655                 pos += tag_len
656
657         code_tag = next(tag
658                         for tag_code, tag in extract_tags(content)
659                         if tag_code == 82)
660         p = code_tag.index(b'\0', 4) + 1
661         code_reader = io.BytesIO(code_tag[p:])
662
663         # Parse ABC (AVM2 ByteCode)
664         def read_int(reader=None):
665             if reader is None:
666                 reader = code_reader
667             res = 0
668             shift = 0
669             for _ in range(5):
670                 buf = reader.read(1)
671                 assert len(buf) == 1
672                 b = struct.unpack('<B', buf)[0]
673                 res = res | ((b & 0x7f) << shift)
674                 if b & 0x80 == 0:
675                     break
676                 shift += 7
677             return res
678
679         def u30(reader=None):
680             res = read_int(reader)
681             assert res & 0xf0000000 == 0
682             return res
683         u32 = read_int
684
685         def s32(reader=None):
686             v = read_int(reader)
687             if v & 0x80000000 != 0:
688                 v = - ((v ^ 0xffffffff) + 1)
689             return v
690
691         def read_string(reader=None):
692             if reader is None:
693                 reader = code_reader
694             slen = u30(reader)
695             resb = reader.read(slen)
696             assert len(resb) == slen
697             return resb.decode('utf-8')
698
699         def read_bytes(count, reader=None):
700             if reader is None:
701                 reader = code_reader
702             resb = reader.read(count)
703             assert len(resb) == count
704             return resb
705
706         def read_byte(reader=None):
707             resb = read_bytes(1, reader=reader)
708             res = struct.unpack('<B', resb)[0]
709             return res
710
711         # minor_version + major_version
712         read_bytes(2 + 2)
713
714         # Constant pool
715         int_count = u30()
716         for _c in range(1, int_count):
717             s32()
718         uint_count = u30()
719         for _c in range(1, uint_count):
720             u32()
721         double_count = u30()
722         read_bytes((double_count-1) * 8)
723         string_count = u30()
724         constant_strings = [u'']
725         for _c in range(1, string_count):
726             s = read_string()
727             constant_strings.append(s)
728         namespace_count = u30()
729         for _c in range(1, namespace_count):
730             read_bytes(1)  # kind
731             u30()  # name
732         ns_set_count = u30()
733         for _c in range(1, ns_set_count):
734             count = u30()
735             for _c2 in range(count):
736                 u30()
737         multiname_count = u30()
738         MULTINAME_SIZES = {
739             0x07: 2,  # QName
740             0x0d: 2,  # QNameA
741             0x0f: 1,  # RTQName
742             0x10: 1,  # RTQNameA
743             0x11: 0,  # RTQNameL
744             0x12: 0,  # RTQNameLA
745             0x09: 2,  # Multiname
746             0x0e: 2,  # MultinameA
747             0x1b: 1,  # MultinameL
748             0x1c: 1,  # MultinameLA
749         }
750         multinames = [u'']
751         for _c in range(1, multiname_count):
752             kind = u30()
753             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
754             if kind == 0x07:
755                 u30()  # namespace_idx
756                 name_idx = u30()
757                 multinames.append(constant_strings[name_idx])
758             else:
759                 multinames.append('[MULTINAME kind: %d]' % kind)
760                 for _c2 in range(MULTINAME_SIZES[kind]):
761                     u30()
762
763         # Methods
764         method_count = u30()
765         MethodInfo = collections.namedtuple(
766             'MethodInfo',
767             ['NEED_ARGUMENTS', 'NEED_REST'])
768         method_infos = []
769         for method_id in range(method_count):
770             param_count = u30()
771             u30()  # return type
772             for _ in range(param_count):
773                 u30()  # param type
774             u30()  # name index (always 0 for youtube)
775             flags = read_byte()
776             if flags & 0x08 != 0:
777                 # Options present
778                 option_count = u30()
779                 for c in range(option_count):
780                     u30()  # val
781                     read_bytes(1)  # kind
782             if flags & 0x80 != 0:
783                 # Param names present
784                 for _ in range(param_count):
785                     u30()  # param name
786             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
787             method_infos.append(mi)
788
789         # Metadata
790         metadata_count = u30()
791         for _c in range(metadata_count):
792             u30()  # name
793             item_count = u30()
794             for _c2 in range(item_count):
795                 u30()  # key
796                 u30()  # value
797
798         def parse_traits_info():
799             trait_name_idx = u30()
800             kind_full = read_byte()
801             kind = kind_full & 0x0f
802             attrs = kind_full >> 4
803             methods = {}
804             if kind in [0x00, 0x06]:  # Slot or Const
805                 u30()  # Slot id
806                 u30()  # type_name_idx
807                 vindex = u30()
808                 if vindex != 0:
809                     read_byte()  # vkind
810             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
811                 u30()  # disp_id
812                 method_idx = u30()
813                 methods[multinames[trait_name_idx]] = method_idx
814             elif kind == 0x04:  # Class
815                 u30()  # slot_id
816                 u30()  # classi
817             elif kind == 0x05:  # Function
818                 u30()  # slot_id
819                 function_idx = u30()
820                 methods[function_idx] = multinames[trait_name_idx]
821             else:
822                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
823
824             if attrs & 0x4 != 0:  # Metadata present
825                 metadata_count = u30()
826                 for _c3 in range(metadata_count):
827                     u30()  # metadata index
828
829             return methods
830
831         # Classes
832         TARGET_CLASSNAME = u'SignatureDecipher'
833         searched_idx = multinames.index(TARGET_CLASSNAME)
834         searched_class_id = None
835         class_count = u30()
836         for class_id in range(class_count):
837             name_idx = u30()
838             if name_idx == searched_idx:
839                 # We found the class we're looking for!
840                 searched_class_id = class_id
841             u30()  # super_name idx
842             flags = read_byte()
843             if flags & 0x08 != 0:  # Protected namespace is present
844                 u30()  # protected_ns_idx
845             intrf_count = u30()
846             for _c2 in range(intrf_count):
847                 u30()
848             u30()  # iinit
849             trait_count = u30()
850             for _c2 in range(trait_count):
851                 parse_traits_info()
852
853         if searched_class_id is None:
854             raise ExtractorError(u'Target class %r not found' %
855                                  TARGET_CLASSNAME)
856
857         method_names = {}
858         method_idxs = {}
859         for class_id in range(class_count):
860             u30()  # cinit
861             trait_count = u30()
862             for _c2 in range(trait_count):
863                 trait_methods = parse_traits_info()
864                 if class_id == searched_class_id:
865                     method_names.update(trait_methods.items())
866                     method_idxs.update(dict(
867                         (idx, name)
868                         for name, idx in trait_methods.items()))
869
870         # Scripts
871         script_count = u30()
872         for _c in range(script_count):
873             u30()  # init
874             trait_count = u30()
875             for _c2 in range(trait_count):
876                 parse_traits_info()
877
878         # Method bodies
879         method_body_count = u30()
880         Method = collections.namedtuple('Method', ['code', 'local_count'])
881         methods = {}
882         for _c in range(method_body_count):
883             method_idx = u30()
884             u30()  # max_stack
885             local_count = u30()
886             u30()  # init_scope_depth
887             u30()  # max_scope_depth
888             code_length = u30()
889             code = read_bytes(code_length)
890             if method_idx in method_idxs:
891                 m = Method(code, local_count)
892                 methods[method_idxs[method_idx]] = m
893             exception_count = u30()
894             for _c2 in range(exception_count):
895                 u30()  # from
896                 u30()  # to
897                 u30()  # target
898                 u30()  # exc_type
899                 u30()  # var_name
900             trait_count = u30()
901             for _c2 in range(trait_count):
902                 parse_traits_info()
903
904         assert p + code_reader.tell() == len(code_tag)
905         assert len(methods) == len(method_idxs)
906
907         method_pyfunctions = {}
908
909         def extract_function(func_name):
910             if func_name in method_pyfunctions:
911                 return method_pyfunctions[func_name]
912             if func_name not in methods:
913                 raise ExtractorError(u'Cannot find function %r' % func_name)
914             m = methods[func_name]
915
916             def resfunc(args):
917                 registers = ['(this)'] + list(args) + [None] * m.local_count
918                 stack = []
919                 coder = io.BytesIO(m.code)
920                 while True:
921                     opcode = struct.unpack('!B', coder.read(1))[0]
922                     if opcode == 36:  # pushbyte
923                         v = struct.unpack('!B', coder.read(1))[0]
924                         stack.append(v)
925                     elif opcode == 44:  # pushstring
926                         idx = u30(coder)
927                         stack.append(constant_strings[idx])
928                     elif opcode == 48:  # pushscope
929                         # We don't implement the scope register, so we'll just
930                         # ignore the popped value
931                         stack.pop()
932                     elif opcode == 70:  # callproperty
933                         index = u30(coder)
934                         mname = multinames[index]
935                         arg_count = u30(coder)
936                         args = list(reversed(
937                             [stack.pop() for _ in range(arg_count)]))
938                         obj = stack.pop()
939                         if mname == u'split':
940                             assert len(args) == 1
941                             assert isinstance(args[0], compat_str)
942                             assert isinstance(obj, compat_str)
943                             if args[0] == u'':
944                                 res = list(obj)
945                             else:
946                                 res = obj.split(args[0])
947                             stack.append(res)
948                         elif mname == u'slice':
949                             assert len(args) == 1
950                             assert isinstance(args[0], int)
951                             assert isinstance(obj, list)
952                             res = obj[args[0]:]
953                             stack.append(res)
954                         elif mname == u'join':
955                             assert len(args) == 1
956                             assert isinstance(args[0], compat_str)
957                             assert isinstance(obj, list)
958                             res = args[0].join(obj)
959                             stack.append(res)
960                         elif mname in method_pyfunctions:
961                             stack.append(method_pyfunctions[mname](args))
962                         else:
963                             raise NotImplementedError(
964                                 u'Unsupported property %r on %r'
965                                 % (mname, obj))
966                     elif opcode == 72:  # returnvalue
967                         res = stack.pop()
968                         return res
969                     elif opcode == 79:  # callpropvoid
970                         index = u30(coder)
971                         mname = multinames[index]
972                         arg_count = u30(coder)
973                         args = list(reversed(
974                             [stack.pop() for _ in range(arg_count)]))
975                         obj = stack.pop()
976                         if mname == u'reverse':
977                             assert isinstance(obj, list)
978                             obj.reverse()
979                         else:
980                             raise NotImplementedError(
981                                 u'Unsupported (void) property %r on %r'
982                                 % (mname, obj))
983                     elif opcode == 93:  # findpropstrict
984                         index = u30(coder)
985                         mname = multinames[index]
986                         res = extract_function(mname)
987                         stack.append(res)
988                     elif opcode == 97:  # setproperty
989                         index = u30(coder)
990                         value = stack.pop()
991                         idx = stack.pop()
992                         obj = stack.pop()
993                         assert isinstance(obj, list)
994                         assert isinstance(idx, int)
995                         obj[idx] = value
996                     elif opcode == 98:  # getlocal
997                         index = u30(coder)
998                         stack.append(registers[index])
999                     elif opcode == 99:  # setlocal
1000                         index = u30(coder)
1001                         value = stack.pop()
1002                         registers[index] = value
1003                     elif opcode == 102:  # getproperty
1004                         index = u30(coder)
1005                         pname = multinames[index]
1006                         if pname == u'length':
1007                             obj = stack.pop()
1008                             assert isinstance(obj, list)
1009                             stack.append(len(obj))
1010                         else:  # Assume attribute access
1011                             idx = stack.pop()
1012                             assert isinstance(idx, int)
1013                             obj = stack.pop()
1014                             assert isinstance(obj, list)
1015                             stack.append(obj[idx])
1016                     elif opcode == 128:  # coerce
1017                         u30(coder)
1018                     elif opcode == 133:  # coerce_s
1019                         assert isinstance(stack[-1], (type(None), compat_str))
1020                     elif opcode == 164:  # modulo
1021                         value2 = stack.pop()
1022                         value1 = stack.pop()
1023                         res = value1 % value2
1024                         stack.append(res)
1025                     elif opcode == 208:  # getlocal_0
1026                         stack.append(registers[0])
1027                     elif opcode == 209:  # getlocal_1
1028                         stack.append(registers[1])
1029                     elif opcode == 210:  # getlocal_2
1030                         stack.append(registers[2])
1031                     elif opcode == 211:  # getlocal_3
1032                         stack.append(registers[3])
1033                     elif opcode == 214:  # setlocal_2
1034                         registers[2] = stack.pop()
1035                     elif opcode == 215:  # setlocal_3
1036                         registers[3] = stack.pop()
1037                     else:
1038                         raise NotImplementedError(
1039                             u'Unsupported opcode %d' % opcode)
1040
1041             method_pyfunctions[func_name] = resfunc
1042             return resfunc
1043
1044         initial_function = extract_function(u'decipher')
1045         return lambda s: initial_function([s])
1046
1047     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1048         """Turn the encrypted s field into a working signature"""
1049
1050         if player_url is not None:
1051             try:
1052                 if player_url not in self._player_cache:
1053                     func = self._extract_signature_function(
1054                         video_id, player_url, len(s)
1055                     )
1056                     self._player_cache[player_url] = func
1057                 func = self._player_cache[player_url]
1058                 if self._downloader.params.get('youtube_print_sig_code'):
1059                     self._print_sig_code(func, len(s))
1060                 return func(s)
1061             except Exception:
1062                 tb = traceback.format_exc()
1063                 self._downloader.report_warning(
1064                     u'Automatic signature extraction failed: ' + tb)
1065
1066             self._downloader.report_warning(
1067                 u'Warning: Falling back to static signature algorithm')
1068         return self._static_decrypt_signature(
1069             s, video_id, player_url, age_gate)
1070
1071     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1072         if age_gate:
1073             # The videos with age protection use another player, so the
1074             # algorithms can be different.
1075             if len(s) == 86:
1076                 return s[2:63] + s[82] + s[64:82] + s[63]
1077
1078         if len(s) == 93:
1079             return s[86:29:-1] + s[88] + s[28:5:-1]
1080         elif len(s) == 92:
1081             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1082         elif len(s) == 91:
1083             return s[84:27:-1] + s[86] + s[26:5:-1]
1084         elif len(s) == 90:
1085             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1086         elif len(s) == 89:
1087             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1088         elif len(s) == 88:
1089             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1090         elif len(s) == 87:
1091             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1092         elif len(s) == 86:
1093             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1094         elif len(s) == 85:
1095             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1096         elif len(s) == 84:
1097             return s[81:36:-1] + s[0] + s[35:2:-1]
1098         elif len(s) == 83:
1099             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1100         elif len(s) == 82:
1101             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1102         elif len(s) == 81:
1103             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1104         elif len(s) == 80:
1105             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1106         elif len(s) == 79:
1107             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1108
1109         else:
1110             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1111
1112     def _get_available_subtitles(self, video_id):
1113         try:
1114             sub_list = self._download_webpage(
1115                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1116                 video_id, note=False)
1117         except ExtractorError as err:
1118             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1119             return {}
1120         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1121
1122         sub_lang_list = {}
1123         for l in lang_list:
1124             lang = l[1]
1125             params = compat_urllib_parse.urlencode({
1126                 'lang': lang,
1127                 'v': video_id,
1128                 'fmt': self._downloader.params.get('subtitlesformat'),
1129             })
1130             url = u'http://www.youtube.com/api/timedtext?' + params
1131             sub_lang_list[lang] = url
1132         if not sub_lang_list:
1133             self._downloader.report_warning(u'video doesn\'t have subtitles')
1134             return {}
1135         return sub_lang_list
1136
1137     def _get_available_automatic_caption(self, video_id, webpage):
1138         """We need the webpage for getting the captions url, pass it as an
1139            argument to speed up the process."""
1140         sub_format = self._downloader.params.get('subtitlesformat')
1141         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1142         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1143         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1144         if mobj is None:
1145             self._downloader.report_warning(err_msg)
1146             return {}
1147         player_config = json.loads(mobj.group(1))
1148         try:
1149             args = player_config[u'args']
1150             caption_url = args[u'ttsurl']
1151             timestamp = args[u'timestamp']
1152             # We get the available subtitles
1153             list_params = compat_urllib_parse.urlencode({
1154                 'type': 'list',
1155                 'tlangs': 1,
1156                 'asrs': 1,
1157             })
1158             list_url = caption_url + '&' + list_params
1159             list_page = self._download_webpage(list_url, video_id)
1160             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1161             original_lang_node = caption_list.find('track')
1162             if original_lang_node.attrib.get('kind') != 'asr' :
1163                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1164                 return {}
1165             original_lang = original_lang_node.attrib['lang_code']
1166
1167             sub_lang_list = {}
1168             for lang_node in caption_list.findall('target'):
1169                 sub_lang = lang_node.attrib['lang_code']
1170                 params = compat_urllib_parse.urlencode({
1171                     'lang': original_lang,
1172                     'tlang': sub_lang,
1173                     'fmt': sub_format,
1174                     'ts': timestamp,
1175                     'kind': 'asr',
1176                 })
1177                 sub_lang_list[sub_lang] = caption_url + '&' + params
1178             return sub_lang_list
1179         # An extractor error can be raise by the download process if there are
1180         # no automatic captions but there are subtitles
1181         except (KeyError, ExtractorError):
1182             self._downloader.report_warning(err_msg)
1183             return {}
1184
1185     def _print_formats(self, formats):
1186         print('Available formats:')
1187         for x in formats:
1188             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1189                                         self._video_dimensions.get(x, '???'),
1190                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1191
1192     def _extract_id(self, url):
1193         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1194         if mobj is None:
1195             raise ExtractorError(u'Invalid URL: %s' % url)
1196         video_id = mobj.group(2)
1197         return video_id
1198
1199     def _get_video_url_list(self, url_map):
1200         """
1201         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1202         with the requested formats.
1203         """
1204         req_format = self._downloader.params.get('format', None)
1205         format_limit = self._downloader.params.get('format_limit', None)
1206         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1207         if format_limit is not None and format_limit in available_formats:
1208             format_list = available_formats[available_formats.index(format_limit):]
1209         else:
1210             format_list = available_formats
1211         existing_formats = [x for x in format_list if x in url_map]
1212         if len(existing_formats) == 0:
1213             raise ExtractorError(u'no known formats available for video')
1214         if self._downloader.params.get('listformats', None):
1215             self._print_formats(existing_formats)
1216             return
1217         if req_format is None or req_format == 'best':
1218             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1219         elif req_format == 'worst':
1220             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1221         elif req_format in ('-1', 'all'):
1222             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1223         else:
1224             # Specific formats. We pick the first in a slash-delimeted sequence.
1225             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1226             # available in the specified format. For example,
1227             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1228             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1229             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1230             req_formats = req_format.split('/')
1231             video_url_list = None
1232             for rf in req_formats:
1233                 if rf in url_map:
1234                     video_url_list = [(rf, url_map[rf])]
1235                     break
1236                 if rf in self._video_formats_map:
1237                     for srf in self._video_formats_map[rf]:
1238                         if srf in url_map:
1239                             video_url_list = [(srf, url_map[srf])]
1240                             break
1241                     else:
1242                         continue
1243                     break
1244             if video_url_list is None:
1245                 raise ExtractorError(u'requested format not available')
1246         return video_url_list
1247
1248     def _extract_from_m3u8(self, manifest_url, video_id):
1249         url_map = {}
1250         def _get_urls(_manifest):
1251             lines = _manifest.split('\n')
1252             urls = filter(lambda l: l and not l.startswith('#'),
1253                             lines)
1254             return urls
1255         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1256         formats_urls = _get_urls(manifest)
1257         for format_url in formats_urls:
1258             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1259             url_map[itag] = format_url
1260         return url_map
1261
1262     def _real_extract(self, url):
1263         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1264             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1265
1266         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1267         mobj = re.search(self._NEXT_URL_RE, url)
1268         if mobj:
1269             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1270         video_id = self._extract_id(url)
1271
1272         # Get video webpage
1273         self.report_video_webpage_download(video_id)
1274         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1275         request = compat_urllib_request.Request(url)
1276         try:
1277             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1278         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1279             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1280
1281         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1282
1283         # Attempt to extract SWF player URL
1284         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1285         if mobj is not None:
1286             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1287         else:
1288             player_url = None
1289
1290         # Get video info
1291         self.report_video_info_webpage_download(video_id)
1292         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1293             self.report_age_confirmation()
1294             age_gate = True
1295             # We simulate the access to the video from www.youtube.com/v/{video_id}
1296             # this can be viewed without login into Youtube
1297             data = compat_urllib_parse.urlencode({'video_id': video_id,
1298                                                   'el': 'embedded',
1299                                                   'gl': 'US',
1300                                                   'hl': 'en',
1301                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1302                                                   'asv': 3,
1303                                                   'sts':'1588',
1304                                                   })
1305             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1306             video_info_webpage = self._download_webpage(video_info_url, video_id,
1307                                     note=False,
1308                                     errnote='unable to download video info webpage')
1309             video_info = compat_parse_qs(video_info_webpage)
1310         else:
1311             age_gate = False
1312             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1313                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1314                         % (video_id, el_type))
1315                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1316                                         note=False,
1317                                         errnote='unable to download video info webpage')
1318                 video_info = compat_parse_qs(video_info_webpage)
1319                 if 'token' in video_info:
1320                     break
1321         if 'token' not in video_info:
1322             if 'reason' in video_info:
1323                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1324             else:
1325                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1326
1327         # Check for "rental" videos
1328         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1329             raise ExtractorError(u'"rental" videos not supported')
1330
1331         # Start extracting information
1332         self.report_information_extraction(video_id)
1333
1334         # uploader
1335         if 'author' not in video_info:
1336             raise ExtractorError(u'Unable to extract uploader name')
1337         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1338
1339         # uploader_id
1340         video_uploader_id = None
1341         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1342         if mobj is not None:
1343             video_uploader_id = mobj.group(1)
1344         else:
1345             self._downloader.report_warning(u'unable to extract uploader nickname')
1346
1347         # title
1348         if 'title' not in video_info:
1349             raise ExtractorError(u'Unable to extract video title')
1350         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1351
1352         # thumbnail image
1353         # We try first to get a high quality image:
1354         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1355                             video_webpage, re.DOTALL)
1356         if m_thumb is not None:
1357             video_thumbnail = m_thumb.group(1)
1358         elif 'thumbnail_url' not in video_info:
1359             self._downloader.report_warning(u'unable to extract video thumbnail')
1360             video_thumbnail = ''
1361         else:   # don't panic if we can't find it
1362             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1363
1364         # upload date
1365         upload_date = None
1366         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1367         if mobj is not None:
1368             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1369             upload_date = unified_strdate(upload_date)
1370
1371         # description
1372         video_description = get_element_by_id("eow-description", video_webpage)
1373         if video_description:
1374             video_description = clean_html(video_description)
1375         else:
1376             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1377             if fd_mobj:
1378                 video_description = unescapeHTML(fd_mobj.group(1))
1379             else:
1380                 video_description = u''
1381
1382         # subtitles
1383         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1384
1385         if self._downloader.params.get('listsubtitles', False):
1386             self._list_available_subtitles(video_id, video_webpage)
1387             return
1388
1389         if 'length_seconds' not in video_info:
1390             self._downloader.report_warning(u'unable to extract video duration')
1391             video_duration = ''
1392         else:
1393             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1394
1395         # Decide which formats to download
1396
1397         try:
1398             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1399             if not mobj:
1400                 raise ValueError('Could not find vevo ID')
1401             info = json.loads(mobj.group(1))
1402             args = info['args']
1403             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1404             # this signatures are encrypted
1405             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1406             if m_s is not None:
1407                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1408                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1409             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1410             if m_s is not None:
1411                 if 'url_encoded_fmt_stream_map' in video_info:
1412                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1413                 else:
1414                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1415             elif 'adaptive_fmts' in video_info:
1416                 if 'url_encoded_fmt_stream_map' in video_info:
1417                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1418                 else:
1419                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1420         except ValueError:
1421             pass
1422
1423         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1424             self.report_rtmp_download()
1425             video_url_list = [(None, video_info['conn'][0])]
1426         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1427             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1428                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1429             url_map = {}
1430             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1431                 url_data = compat_parse_qs(url_data_str)
1432                 if 'itag' in url_data and 'url' in url_data:
1433                     url = url_data['url'][0]
1434                     if 'sig' in url_data:
1435                         url += '&signature=' + url_data['sig'][0]
1436                     elif 's' in url_data:
1437                         encrypted_sig = url_data['s'][0]
1438                         if self._downloader.params.get('verbose'):
1439                             if age_gate:
1440                                 if player_url is None:
1441                                     player_version = 'unknown'
1442                                 else:
1443                                     player_version = self._search_regex(
1444                                         r'-(.+)\.swf$', player_url,
1445                                         u'flash player', fatal=False)
1446                                 player_desc = 'flash player %s' % player_version
1447                             else:
1448                                 player_version = self._search_regex(
1449                                     r'html5player-(.+?)\.js', video_webpage,
1450                                     'html5 player', fatal=False)
1451                                 player_desc = u'html5 player %s' % player_version
1452
1453                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1454                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1455                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1456
1457                         if not age_gate:
1458                             jsplayer_url_json = self._search_regex(
1459                                 r'"assets":.+?"js":\s*("[^"]+")',
1460                                 video_webpage, u'JS player URL')
1461                             player_url = json.loads(jsplayer_url_json)
1462
1463                         signature = self._decrypt_signature(
1464                             encrypted_sig, video_id, player_url, age_gate)
1465                         url += '&signature=' + signature
1466                     if 'ratebypass' not in url:
1467                         url += '&ratebypass=yes'
1468                     url_map[url_data['itag'][0]] = url
1469             video_url_list = self._get_video_url_list(url_map)
1470             if not video_url_list:
1471                 return
1472         elif video_info.get('hlsvp'):
1473             manifest_url = video_info['hlsvp'][0]
1474             url_map = self._extract_from_m3u8(manifest_url, video_id)
1475             video_url_list = self._get_video_url_list(url_map)
1476             if not video_url_list:
1477                 return
1478
1479         else:
1480             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1481
1482         results = []
1483         for format_param, video_real_url in video_url_list:
1484             # Extension
1485             video_extension = self._video_extensions.get(format_param, 'flv')
1486
1487             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1488                                               self._video_dimensions.get(format_param, '???'),
1489                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1490
1491             results.append({
1492                 'id':       video_id,
1493                 'url':      video_real_url,
1494                 'uploader': video_uploader,
1495                 'uploader_id': video_uploader_id,
1496                 'upload_date':  upload_date,
1497                 'title':    video_title,
1498                 'ext':      video_extension,
1499                 'format':   video_format,
1500                 'thumbnail':    video_thumbnail,
1501                 'description':  video_description,
1502                 'player_url':   player_url,
1503                 'subtitles':    video_subtitles,
1504                 'duration':     video_duration
1505             })
1506         return results
1507
1508 class YoutubePlaylistIE(InfoExtractor):
1509     IE_DESC = u'YouTube.com playlists'
1510     _VALID_URL = r"""(?:
1511                         (?:https?://)?
1512                         (?:\w+\.)?
1513                         youtube\.com/
1514                         (?:
1515                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1516                            \? (?:.*?&)*? (?:p|a|list)=
1517                         |  p/
1518                         )
1519                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1520                         .*
1521                      |
1522                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1523                      )"""
1524     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1525     _MAX_RESULTS = 50
1526     IE_NAME = u'youtube:playlist'
1527
1528     @classmethod
1529     def suitable(cls, url):
1530         """Receives a URL and returns True if suitable for this IE."""
1531         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1532
1533     def _real_extract(self, url):
1534         # Extract playlist id
1535         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1536         if mobj is None:
1537             raise ExtractorError(u'Invalid URL: %s' % url)
1538
1539         # Download playlist videos from API
1540         playlist_id = mobj.group(1) or mobj.group(2)
1541         videos = []
1542
1543         for page_num in itertools.count(1):
1544             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1545             if start_index >= 1000:
1546                 self._downloader.report_warning(u'Max number of results reached')
1547                 break
1548             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1549             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1550
1551             try:
1552                 response = json.loads(page)
1553             except ValueError as err:
1554                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1555
1556             if 'feed' not in response:
1557                 raise ExtractorError(u'Got a malformed response from YouTube API')
1558             playlist_title = response['feed']['title']['$t']
1559             if 'entry' not in response['feed']:
1560                 # Number of videos is a multiple of self._MAX_RESULTS
1561                 break
1562
1563             for entry in response['feed']['entry']:
1564                 index = entry['yt$position']['$t']
1565                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1566                     videos.append((
1567                         index,
1568                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1569                     ))
1570
1571         videos = [v[1] for v in sorted(videos)]
1572
1573         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1574         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1575
1576
1577 class YoutubeChannelIE(InfoExtractor):
1578     IE_DESC = u'YouTube.com channels'
1579     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1580     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1581     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1582     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1583     IE_NAME = u'youtube:channel'
1584
1585     def extract_videos_from_page(self, page):
1586         ids_in_page = []
1587         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1588             if mobj.group(1) not in ids_in_page:
1589                 ids_in_page.append(mobj.group(1))
1590         return ids_in_page
1591
1592     def _real_extract(self, url):
1593         # Extract channel id
1594         mobj = re.match(self._VALID_URL, url)
1595         if mobj is None:
1596             raise ExtractorError(u'Invalid URL: %s' % url)
1597
1598         # Download channel page
1599         channel_id = mobj.group(1)
1600         video_ids = []
1601         pagenum = 1
1602
1603         url = self._TEMPLATE_URL % (channel_id, pagenum)
1604         page = self._download_webpage(url, channel_id,
1605                                       u'Downloading page #%s' % pagenum)
1606
1607         # Extract video identifiers
1608         ids_in_page = self.extract_videos_from_page(page)
1609         video_ids.extend(ids_in_page)
1610
1611         # Download any subsequent channel pages using the json-based channel_ajax query
1612         if self._MORE_PAGES_INDICATOR in page:
1613             for pagenum in itertools.count(1):
1614                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1615                 page = self._download_webpage(url, channel_id,
1616                                               u'Downloading page #%s' % pagenum)
1617
1618                 page = json.loads(page)
1619
1620                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1621                 video_ids.extend(ids_in_page)
1622
1623                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1624                     break
1625
1626         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1627
1628         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1629         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1630         return [self.playlist_result(url_entries, channel_id)]
1631
1632
1633 class YoutubeUserIE(InfoExtractor):
1634     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1635     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1636     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1637     _GDATA_PAGE_SIZE = 50
1638     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1639     IE_NAME = u'youtube:user'
1640
1641     @classmethod
1642     def suitable(cls, url):
1643         # Don't return True if the url can be extracted with other youtube
1644         # extractor, the regex would is too permissive and it would match.
1645         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1646         if any(ie.suitable(url) for ie in other_ies): return False
1647         else: return super(YoutubeUserIE, cls).suitable(url)
1648
1649     def _real_extract(self, url):
1650         # Extract username
1651         mobj = re.match(self._VALID_URL, url)
1652         if mobj is None:
1653             raise ExtractorError(u'Invalid URL: %s' % url)
1654
1655         username = mobj.group(1)
1656
1657         # Download video ids using YouTube Data API. Result size per
1658         # query is limited (currently to 50 videos) so we need to query
1659         # page by page until there are no video ids - it means we got
1660         # all of them.
1661
1662         video_ids = []
1663
1664         for pagenum in itertools.count(0):
1665             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1666
1667             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1668             page = self._download_webpage(gdata_url, username,
1669                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1670
1671             try:
1672                 response = json.loads(page)
1673             except ValueError as err:
1674                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1675             if 'entry' not in response['feed']:
1676                 # Number of videos is a multiple of self._MAX_RESULTS
1677                 break
1678
1679             # Extract video identifiers
1680             ids_in_page = []
1681             for entry in response['feed']['entry']:
1682                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1683             video_ids.extend(ids_in_page)
1684
1685             # A little optimization - if current page is not
1686             # "full", ie. does not contain PAGE_SIZE video ids then
1687             # we can assume that this page is the last one - there
1688             # are no more ids on further pages - no need to query
1689             # again.
1690
1691             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1692                 break
1693
1694         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1695         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1696         return [self.playlist_result(url_results, playlist_title = username)]
1697
1698 class YoutubeSearchIE(SearchInfoExtractor):
1699     IE_DESC = u'YouTube.com searches'
1700     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1701     _MAX_RESULTS = 1000
1702     IE_NAME = u'youtube:search'
1703     _SEARCH_KEY = 'ytsearch'
1704
1705     def report_download_page(self, query, pagenum):
1706         """Report attempt to download search page with given number."""
1707         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1708
1709     def _get_n_results(self, query, n):
1710         """Get a specified number of results for a query"""
1711
1712         video_ids = []
1713         pagenum = 0
1714         limit = n
1715
1716         while (50 * pagenum) < limit:
1717             self.report_download_page(query, pagenum+1)
1718             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1719             request = compat_urllib_request.Request(result_url)
1720             try:
1721                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1722             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1723                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1724             api_response = json.loads(data)['data']
1725
1726             if not 'items' in api_response:
1727                 raise ExtractorError(u'[youtube] No video results')
1728
1729             new_ids = list(video['id'] for video in api_response['items'])
1730             video_ids += new_ids
1731
1732             limit = min(n, api_response['totalItems'])
1733             pagenum += 1
1734
1735         if len(video_ids) > n:
1736             video_ids = video_ids[:n]
1737         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1738         return self.playlist_result(videos, query)
1739
1740
1741 class YoutubeShowIE(InfoExtractor):
1742     IE_DESC = u'YouTube.com (multi-season) shows'
1743     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1744     IE_NAME = u'youtube:show'
1745
1746     def _real_extract(self, url):
1747         mobj = re.match(self._VALID_URL, url)
1748         show_name = mobj.group(1)
1749         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1750         # There's one playlist for each season of the show
1751         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1752         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1753         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1754
1755
1756 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1757     """
1758     Base class for extractors that fetch info from
1759     http://www.youtube.com/feed_ajax
1760     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1761     """
1762     _LOGIN_REQUIRED = True
1763     _PAGING_STEP = 30
1764     # use action_load_personal_feed instead of action_load_system_feed
1765     _PERSONAL_FEED = False
1766
1767     @property
1768     def _FEED_TEMPLATE(self):
1769         action = 'action_load_system_feed'
1770         if self._PERSONAL_FEED:
1771             action = 'action_load_personal_feed'
1772         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1773
1774     @property
1775     def IE_NAME(self):
1776         return u'youtube:%s' % self._FEED_NAME
1777
1778     def _real_initialize(self):
1779         self._login()
1780
1781     def _real_extract(self, url):
1782         feed_entries = []
1783         # The step argument is available only in 2.7 or higher
1784         for i in itertools.count(0):
1785             paging = i*self._PAGING_STEP
1786             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1787                                           u'%s feed' % self._FEED_NAME,
1788                                           u'Downloading page %s' % i)
1789             info = json.loads(info)
1790             feed_html = info['feed_html']
1791             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1792             ids = orderedSet(m.group(1) for m in m_ids)
1793             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1794             if info['paging'] is None:
1795                 break
1796         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1797
1798 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1799     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1800     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1801     _FEED_NAME = 'subscriptions'
1802     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1803
1804 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1805     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1806     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1807     _FEED_NAME = 'recommended'
1808     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1809
1810 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1811     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1812     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1813     _FEED_NAME = 'watch_later'
1814     _PLAYLIST_TITLE = u'Youtube Watch Later'
1815     _PAGING_STEP = 100
1816     _PERSONAL_FEED = True
1817
1818 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1819     IE_NAME = u'youtube:favorites'
1820     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1821     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1822     _LOGIN_REQUIRED = True
1823
1824     def _real_extract(self, url):
1825         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1826         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1827         return self.url_result(playlist_id, 'YoutubePlaylist')