[youtube] Remove _decrypt_signature_age_gate
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import xml.etree.ElementTree
15 import zlib
16
17 from .common import InfoExtractor, SearchInfoExtractor
18 from .subtitles import SubtitlesInfoExtractor
19 from ..utils import (
20     compat_chr,
21     compat_http_client,
22     compat_parse_qs,
23     compat_urllib_error,
24     compat_urllib_parse,
25     compat_urllib_request,
26     compat_str,
27
28     clean_html,
29     get_element_by_id,
30     ExtractorError,
31     unescapeHTML,
32     unified_strdate,
33     orderedSet,
34     write_json_file,
35 )
36
37 class YoutubeBaseInfoExtractor(InfoExtractor):
38     """Provide base functions for Youtube extractors"""
39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
42     _NETRC_MACHINE = 'youtube'
43     # If True it will raise an error if no login info is provided
44     _LOGIN_REQUIRED = False
45
46     def report_lang(self):
47         """Report attempt to set language."""
48         self.to_screen(u'Setting language')
49
50     def _set_language(self):
51         request = compat_urllib_request.Request(self._LANG_URL)
52         try:
53             self.report_lang()
54             compat_urllib_request.urlopen(request).read()
55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
57             return False
58         return True
59
60     def _login(self):
61         (username, password) = self._get_login_info()
62         # No authentication to be performed
63         if username is None:
64             if self._LOGIN_REQUIRED:
65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
66             return False
67
68         request = compat_urllib_request.Request(self._LOGIN_URL)
69         try:
70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
73             return False
74
75         galx = None
76         dsh = None
77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
78         if match:
79           galx = match.group(1)
80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
81         if match:
82           dsh = match.group(1)
83
84         # Log in
85         login_form_strs = {
86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
87                 u'Email': username,
88                 u'GALX': galx,
89                 u'Passwd': password,
90                 u'PersistentCookie': u'yes',
91                 u'_utf8': u'霱',
92                 u'bgresponse': u'js_disabled',
93                 u'checkConnection': u'',
94                 u'checkedDomains': u'youtube',
95                 u'dnConn': u'',
96                 u'dsh': dsh,
97                 u'pstMsg': u'0',
98                 u'rmShown': u'1',
99                 u'secTok': u'',
100                 u'signIn': u'Sign in',
101                 u'timeStmp': u'',
102                 u'service': u'youtube',
103                 u'uilel': u'3',
104                 u'hl': u'en_US',
105         }
106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
107         # chokes on unicode
108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
111         try:
112             self.report_login()
113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
115                 self._downloader.report_warning(u'unable to log in: bad username or password')
116                 return False
117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
119             return False
120         return True
121
122     def _confirm_age(self):
123         age_form = {
124                 'next_url':     '/',
125                 'action_confirm':   'Confirm',
126                 }
127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
128         try:
129             self.report_age_confirmation()
130             compat_urllib_request.urlopen(request).read().decode('utf-8')
131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
133         return True
134
135     def _real_initialize(self):
136         if self._downloader is None:
137             return
138         if not self._set_language():
139             return
140         if not self._login():
141             return
142         self._confirm_age()
143
144
145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
146     IE_DESC = u'YouTube.com'
147     _VALID_URL = r"""^
148                      (
149                          (?:https?://)?                                       # http(s):// (optional)
150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
151                             tube\.majestyc\.net/|
152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
154                          (?:                                                  # the various things that can precede the ID:
155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
156                              |(?:                                             # or the v= param in all its forms
157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
160                                  v=
161                              )
162                          ))
163                          |youtu\.be/                                          # just youtu.be/xxxx
164                          )
165                      )?                                                       # all until now is optional -> you can pass the naked ID
166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
167                      (?(1).+)?                                                # if we found the ID, everything can follow
168                      $"""
169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
170     # Listed in order of quality
171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
172                           # Apple HTTP Live Streaming
173                           '96', '95', '94', '93', '92', '132', '151',
174                           # 3D
175                           '85', '84', '102', '83', '101', '82', '100',
176                           # Dash video
177                           '138', '137', '248', '136', '247', '135', '246',
178                           '245', '244', '134', '243', '133', '242', '160',
179                           # Dash audio
180                           '141', '172', '140', '171', '139',
181                           ]
182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
183                                       # Apple HTTP Live Streaming
184                                       '96', '95', '94', '93', '92', '132', '151',
185                                       # 3D
186                                       '85', '102', '84', '101', '83', '100', '82',
187                                       # Dash video
188                                       '138', '248', '137', '247', '136', '246', '245',
189                                       '244', '135', '243', '134', '242', '133', '160',
190                                       # Dash audio
191                                       '172', '141', '171', '140', '139',
192                                       ]
193     _video_formats_map = {
194         'flv': ['35', '34', '6', '5'],
195         '3gp': ['36', '17', '13'],
196         'mp4': ['38', '37', '22', '18'],
197         'webm': ['46', '45', '44', '43'],
198     }
199     _video_extensions = {
200         '13': '3gp',
201         '17': '3gp',
202         '18': 'mp4',
203         '22': 'mp4',
204         '36': '3gp',
205         '37': 'mp4',
206         '38': 'mp4',
207         '43': 'webm',
208         '44': 'webm',
209         '45': 'webm',
210         '46': 'webm',
211
212         # 3d videos
213         '82': 'mp4',
214         '83': 'mp4',
215         '84': 'mp4',
216         '85': 'mp4',
217         '100': 'webm',
218         '101': 'webm',
219         '102': 'webm',
220
221         # Apple HTTP Live Streaming
222         '92': 'mp4',
223         '93': 'mp4',
224         '94': 'mp4',
225         '95': 'mp4',
226         '96': 'mp4',
227         '132': 'mp4',
228         '151': 'mp4',
229
230         # Dash mp4
231         '133': 'mp4',
232         '134': 'mp4',
233         '135': 'mp4',
234         '136': 'mp4',
235         '137': 'mp4',
236         '138': 'mp4',
237         '139': 'mp4',
238         '140': 'mp4',
239         '141': 'mp4',
240         '160': 'mp4',
241
242         # Dash webm
243         '171': 'webm',
244         '172': 'webm',
245         '242': 'webm',
246         '243': 'webm',
247         '244': 'webm',
248         '245': 'webm',
249         '246': 'webm',
250         '247': 'webm',
251         '248': 'webm',
252     }
253     _video_dimensions = {
254         '5': '240x400',
255         '6': '???',
256         '13': '???',
257         '17': '144x176',
258         '18': '360x640',
259         '22': '720x1280',
260         '34': '360x640',
261         '35': '480x854',
262         '36': '240x320',
263         '37': '1080x1920',
264         '38': '3072x4096',
265         '43': '360x640',
266         '44': '480x854',
267         '45': '720x1280',
268         '46': '1080x1920',
269         '82': '360p',
270         '83': '480p',
271         '84': '720p',
272         '85': '1080p',
273         '92': '240p',
274         '93': '360p',
275         '94': '480p',
276         '95': '720p',
277         '96': '1080p',
278         '100': '360p',
279         '101': '480p',
280         '102': '720p',
281         '132': '240p',
282         '151': '72p',
283         '133': '240p',
284         '134': '360p',
285         '135': '480p',
286         '136': '720p',
287         '137': '1080p',
288         '138': '>1080p',
289         '139': '48k',
290         '140': '128k',
291         '141': '256k',
292         '160': '192p',
293         '171': '128k',
294         '172': '256k',
295         '242': '240p',
296         '243': '360p',
297         '244': '480p',
298         '245': '480p',
299         '246': '480p',
300         '247': '720p',
301         '248': '1080p',
302     }
303     _special_itags = {
304         '82': '3D',
305         '83': '3D',
306         '84': '3D',
307         '85': '3D',
308         '100': '3D',
309         '101': '3D',
310         '102': '3D',
311         '133': 'DASH Video',
312         '134': 'DASH Video',
313         '135': 'DASH Video',
314         '136': 'DASH Video',
315         '137': 'DASH Video',
316         '138': 'DASH Video',
317         '139': 'DASH Audio',
318         '140': 'DASH Audio',
319         '141': 'DASH Audio',
320         '160': 'DASH Video',
321         '171': 'DASH Audio',
322         '172': 'DASH Audio',
323         '242': 'DASH Video',
324         '243': 'DASH Video',
325         '244': 'DASH Video',
326         '245': 'DASH Video',
327         '246': 'DASH Video',
328         '247': 'DASH Video',
329         '248': 'DASH Video',
330     }
331
332     IE_NAME = u'youtube'
333     _TESTS = [
334         {
335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
336             u"file":  u"BaW_jenozKc.mp4",
337             u"info_dict": {
338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
339                 u"uploader": u"Philipp Hagemeister",
340                 u"uploader_id": u"phihag",
341                 u"upload_date": u"20121002",
342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
343             }
344         },
345         {
346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
347             u"file":  u"1ltcDfZMA3U.flv",
348             u"note": u"Test VEVO video (#897)",
349             u"info_dict": {
350                 u"upload_date": u"20070518",
351                 u"title": u"Maps - It Will Find You",
352                 u"description": u"Music video by Maps performing It Will Find You.",
353                 u"uploader": u"MuteUSA",
354                 u"uploader_id": u"MuteUSA"
355             }
356         },
357         {
358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
359             u"file":  u"UxxajLWwzqY.mp4",
360             u"note": u"Test generic use_cipher_signature video (#897)",
361             u"info_dict": {
362                 u"upload_date": u"20120506",
363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
364                 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
365                 u"uploader": u"Icona Pop",
366                 u"uploader_id": u"IconaPop"
367             }
368         },
369         {
370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
371             u"file":  u"07FYdnEawAQ.mp4",
372             u"note": u"Test VEVO video with age protection (#956)",
373             u"info_dict": {
374                 u"upload_date": u"20130703",
375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
377                 u"uploader": u"justintimberlakeVEVO",
378                 u"uploader_id": u"justintimberlakeVEVO"
379             }
380         },
381         {
382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
383             u'file': u'TGi3HqYrWHE.mp4',
384             u'note': u'm3u8 video',
385             u'info_dict': {
386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
388                 u'uploader': u'olympic',
389                 u'upload_date': u'20120807',
390                 u'uploader_id': u'olympic',
391             },
392             u'params': {
393                 u'skip_download': True,
394             },
395         },
396     ]
397
398
399     @classmethod
400     def suitable(cls, url):
401         """Receives a URL and returns True if suitable for this IE."""
402         if YoutubePlaylistIE.suitable(url): return False
403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
404
405     def __init__(self, *args, **kwargs):
406         super(YoutubeIE, self).__init__(*args, **kwargs)
407         self._player_cache = {}
408
409     def report_video_webpage_download(self, video_id):
410         """Report attempt to download video webpage."""
411         self.to_screen(u'%s: Downloading video webpage' % video_id)
412
413     def report_video_info_webpage_download(self, video_id):
414         """Report attempt to download video info webpage."""
415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
416
417     def report_information_extraction(self, video_id):
418         """Report attempt to extract video information."""
419         self.to_screen(u'%s: Extracting video information' % video_id)
420
421     def report_unavailable_format(self, video_id, format):
422         """Report extracted video URL."""
423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
424
425     def report_rtmp_download(self):
426         """Indicate the download will use the RTMP protocol."""
427         self.to_screen(u'RTMP download detected')
428
429     def _extract_signature_function(self, video_id, player_url, slen):
430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
431                         player_url)
432         player_type = id_m.group('ext')
433         player_id = id_m.group('id')
434
435         # Read from filesystem cache
436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
437         assert os.path.basename(func_id) == func_id
438         cache_dir = self._downloader.params.get('cachedir',
439                                                 u'~/.youtube-dl/cache')
440
441         cache_enabled = cache_dir != u'NONE'
442         if cache_enabled:
443             cache_fn = os.path.join(os.path.expanduser(cache_dir),
444                                     u'youtube-sigfuncs',
445                                     func_id + '.json')
446             try:
447                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
448                     cache_spec = json.load(cachef)
449                 return lambda s: u''.join(s[i] for i in cache_spec)
450             except IOError:
451                 pass  # No cache available
452
453         if player_type == 'js':
454             code = self._download_webpage(
455                 player_url, video_id,
456                 note=u'Downloading %s player %s' % (player_type, player_id),
457                 errnote=u'Download of %s failed' % player_url)
458             res = self._parse_sig_js(code)
459         elif player_type == 'swf':
460             urlh = self._request_webpage(
461                 player_url, video_id,
462                 note=u'Downloading %s player %s' % (player_type, player_id),
463                 errnote=u'Download of %s failed' % player_url)
464             code = urlh.read()
465             res = self._parse_sig_swf(code)
466         else:
467             assert False, 'Invalid player type %r' % player_type
468
469         if cache_enabled:
470             try:
471                 cache_res = res(map(compat_chr, range(slen)))
472                 cache_spec = [ord(c) for c in cache_res]
473                 try:
474                     os.makedirs(os.path.dirname(cache_fn))
475                 except OSError as ose:
476                     if ose.errno != errno.EEXIST:
477                         raise
478                 write_json_file(cache_spec, cache_fn)
479             except Exception:
480                 tb = traceback.format_exc()
481                 self._downloader.report_warning(
482                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
483
484         return res
485
486     def _print_sig_code(self, func, slen):
487         def gen_sig_code(idxs):
488             def _genslice(start, end, step):
489                 starts = u'' if start == 0 else str(start)
490                 ends = u':%d' % (end+step)
491                 steps = u'' if step == 1 else (':%d' % step)
492                 return u's[%s%s%s]' % (starts, ends, steps)
493
494             step = None
495             start = '(Never used)'  # Quelch pyflakes warnings - start will be
496                                     # set as soon as step is set
497             for i, prev in zip(idxs[1:], idxs[:-1]):
498                 if step is not None:
499                     if i - prev == step:
500                         continue
501                     yield _genslice(start, prev, step)
502                     step = None
503                     continue
504                 if i - prev in [-1, 1]:
505                     step = i - prev
506                     start = prev
507                     continue
508                 else:
509                     yield u's[%d]' % prev
510             if step is None:
511                 yield u's[%d]' % i
512             else:
513                 yield _genslice(start, i, step)
514
515         cache_res = func(map(compat_chr, range(slen)))
516         cache_spec = [ord(c) for c in cache_res]
517         expr_code = u' + '.join(gen_sig_code(cache_spec))
518         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
519         self.to_screen(u'Extracted signature function:\n' + code)
520
521     def _parse_sig_js(self, jscode):
522         funcname = self._search_regex(
523             r'signature=([a-zA-Z]+)', jscode,
524             u'Initial JS player signature function name')
525
526         functions = {}
527
528         def argidx(varname):
529             return string.lowercase.index(varname)
530
531         def interpret_statement(stmt, local_vars, allow_recursion=20):
532             if allow_recursion < 0:
533                 raise ExtractorError(u'Recursion limit reached')
534
535             if stmt.startswith(u'var '):
536                 stmt = stmt[len(u'var '):]
537             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
538                              r'=(?P<expr>.*)$', stmt)
539             if ass_m:
540                 if ass_m.groupdict().get('index'):
541                     def assign(val):
542                         lvar = local_vars[ass_m.group('out')]
543                         idx = interpret_expression(ass_m.group('index'),
544                                                    local_vars, allow_recursion)
545                         assert isinstance(idx, int)
546                         lvar[idx] = val
547                         return val
548                     expr = ass_m.group('expr')
549                 else:
550                     def assign(val):
551                         local_vars[ass_m.group('out')] = val
552                         return val
553                     expr = ass_m.group('expr')
554             elif stmt.startswith(u'return '):
555                 assign = lambda v: v
556                 expr = stmt[len(u'return '):]
557             else:
558                 raise ExtractorError(
559                     u'Cannot determine left side of statement in %r' % stmt)
560
561             v = interpret_expression(expr, local_vars, allow_recursion)
562             return assign(v)
563
564         def interpret_expression(expr, local_vars, allow_recursion):
565             if expr.isdigit():
566                 return int(expr)
567
568             if expr.isalpha():
569                 return local_vars[expr]
570
571             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
572             if m:
573                 member = m.group('member')
574                 val = local_vars[m.group('in')]
575                 if member == 'split("")':
576                     return list(val)
577                 if member == 'join("")':
578                     return u''.join(val)
579                 if member == 'length':
580                     return len(val)
581                 if member == 'reverse()':
582                     return val[::-1]
583                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
584                 if slice_m:
585                     idx = interpret_expression(
586                         slice_m.group('idx'), local_vars, allow_recursion-1)
587                     return val[idx:]
588
589             m = re.match(
590                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
591             if m:
592                 val = local_vars[m.group('in')]
593                 idx = interpret_expression(m.group('idx'), local_vars,
594                                            allow_recursion-1)
595                 return val[idx]
596
597             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
598             if m:
599                 a = interpret_expression(m.group('a'),
600                                          local_vars, allow_recursion)
601                 b = interpret_expression(m.group('b'),
602                                          local_vars, allow_recursion)
603                 return a % b
604
605             m = re.match(
606                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
607             if m:
608                 fname = m.group('func')
609                 if fname not in functions:
610                     functions[fname] = extract_function(fname)
611                 argvals = [int(v) if v.isdigit() else local_vars[v]
612                            for v in m.group('args').split(',')]
613                 return functions[fname](argvals)
614             raise ExtractorError(u'Unsupported JS expression %r' % expr)
615
616         def extract_function(funcname):
617             func_m = re.search(
618                 r'function ' + re.escape(funcname) +
619                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
620                 jscode)
621             argnames = func_m.group('args').split(',')
622
623             def resf(args):
624                 local_vars = dict(zip(argnames, args))
625                 for stmt in func_m.group('code').split(';'):
626                     res = interpret_statement(stmt, local_vars)
627                 return res
628             return resf
629
630         initial_function = extract_function(funcname)
631         return lambda s: initial_function([s])
632
633     def _parse_sig_swf(self, file_contents):
634         if file_contents[1:3] != b'WS':
635             raise ExtractorError(
636                 u'Not an SWF file; header is %r' % file_contents[:3])
637         if file_contents[:1] == b'C':
638             content = zlib.decompress(file_contents[8:])
639         else:
640             raise NotImplementedError(u'Unsupported compression format %r' %
641                                       file_contents[:1])
642
643         def extract_tags(content):
644             pos = 0
645             while pos < len(content):
646                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
647                 pos += 2
648                 tag_code = header16 >> 6
649                 tag_len = header16 & 0x3f
650                 if tag_len == 0x3f:
651                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
652                     pos += 4
653                 assert pos+tag_len <= len(content)
654                 yield (tag_code, content[pos:pos+tag_len])
655                 pos += tag_len
656
657         code_tag = next(tag
658                         for tag_code, tag in extract_tags(content)
659                         if tag_code == 82)
660         p = code_tag.index(b'\0', 4) + 1
661         code_reader = io.BytesIO(code_tag[p:])
662
663         # Parse ABC (AVM2 ByteCode)
664         def read_int(reader=None):
665             if reader is None:
666                 reader = code_reader
667             res = 0
668             shift = 0
669             for _ in range(5):
670                 buf = reader.read(1)
671                 assert len(buf) == 1
672                 b = struct.unpack('<B', buf)[0]
673                 res = res | ((b & 0x7f) << shift)
674                 if b & 0x80 == 0:
675                     break
676                 shift += 7
677             return res
678
679         def u30(reader=None):
680             res = read_int(reader)
681             assert res & 0xf0000000 == 0
682             return res
683         u32 = read_int
684
685         def s32(reader=None):
686             v = read_int(reader)
687             if v & 0x80000000 != 0:
688                 v = - ((v ^ 0xffffffff) + 1)
689             return v
690
691         def read_string(reader=None):
692             if reader is None:
693                 reader = code_reader
694             slen = u30(reader)
695             resb = reader.read(slen)
696             assert len(resb) == slen
697             return resb.decode('utf-8')
698
699         def read_bytes(count, reader=None):
700             if reader is None:
701                 reader = code_reader
702             resb = reader.read(count)
703             assert len(resb) == count
704             return resb
705
706         def read_byte(reader=None):
707             resb = read_bytes(1, reader=reader)
708             res = struct.unpack('<B', resb)[0]
709             return res
710
711         # minor_version + major_version
712         read_bytes(2 + 2)
713
714         # Constant pool
715         int_count = u30()
716         for _c in range(1, int_count):
717             s32()
718         uint_count = u30()
719         for _c in range(1, uint_count):
720             u32()
721         double_count = u30()
722         read_bytes((double_count-1) * 8)
723         string_count = u30()
724         constant_strings = [u'']
725         for _c in range(1, string_count):
726             s = read_string()
727             constant_strings.append(s)
728         namespace_count = u30()
729         for _c in range(1, namespace_count):
730             read_bytes(1)  # kind
731             u30()  # name
732         ns_set_count = u30()
733         for _c in range(1, ns_set_count):
734             count = u30()
735             for _c2 in range(count):
736                 u30()
737         multiname_count = u30()
738         MULTINAME_SIZES = {
739             0x07: 2,  # QName
740             0x0d: 2,  # QNameA
741             0x0f: 1,  # RTQName
742             0x10: 1,  # RTQNameA
743             0x11: 0,  # RTQNameL
744             0x12: 0,  # RTQNameLA
745             0x09: 2,  # Multiname
746             0x0e: 2,  # MultinameA
747             0x1b: 1,  # MultinameL
748             0x1c: 1,  # MultinameLA
749         }
750         multinames = [u'']
751         for _c in range(1, multiname_count):
752             kind = u30()
753             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
754             if kind == 0x07:
755                 u30()  # namespace_idx
756                 name_idx = u30()
757                 multinames.append(constant_strings[name_idx])
758             else:
759                 multinames.append('[MULTINAME kind: %d]' % kind)
760                 for _c2 in range(MULTINAME_SIZES[kind]):
761                     u30()
762
763         # Methods
764         method_count = u30()
765         MethodInfo = collections.namedtuple(
766             'MethodInfo',
767             ['NEED_ARGUMENTS', 'NEED_REST'])
768         method_infos = []
769         for method_id in range(method_count):
770             param_count = u30()
771             u30()  # return type
772             for _ in range(param_count):
773                 u30()  # param type
774             u30()  # name index (always 0 for youtube)
775             flags = read_byte()
776             if flags & 0x08 != 0:
777                 # Options present
778                 option_count = u30()
779                 for c in range(option_count):
780                     u30()  # val
781                     read_bytes(1)  # kind
782             if flags & 0x80 != 0:
783                 # Param names present
784                 for _ in range(param_count):
785                     u30()  # param name
786             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
787             method_infos.append(mi)
788
789         # Metadata
790         metadata_count = u30()
791         for _c in range(metadata_count):
792             u30()  # name
793             item_count = u30()
794             for _c2 in range(item_count):
795                 u30()  # key
796                 u30()  # value
797
798         def parse_traits_info():
799             trait_name_idx = u30()
800             kind_full = read_byte()
801             kind = kind_full & 0x0f
802             attrs = kind_full >> 4
803             methods = {}
804             if kind in [0x00, 0x06]:  # Slot or Const
805                 u30()  # Slot id
806                 u30()  # type_name_idx
807                 vindex = u30()
808                 if vindex != 0:
809                     read_byte()  # vkind
810             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
811                 u30()  # disp_id
812                 method_idx = u30()
813                 methods[multinames[trait_name_idx]] = method_idx
814             elif kind == 0x04:  # Class
815                 u30()  # slot_id
816                 u30()  # classi
817             elif kind == 0x05:  # Function
818                 u30()  # slot_id
819                 function_idx = u30()
820                 methods[function_idx] = multinames[trait_name_idx]
821             else:
822                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
823
824             if attrs & 0x4 != 0:  # Metadata present
825                 metadata_count = u30()
826                 for _c3 in range(metadata_count):
827                     u30()  # metadata index
828
829             return methods
830
831         # Classes
832         TARGET_CLASSNAME = u'SignatureDecipher'
833         searched_idx = multinames.index(TARGET_CLASSNAME)
834         searched_class_id = None
835         class_count = u30()
836         for class_id in range(class_count):
837             name_idx = u30()
838             if name_idx == searched_idx:
839                 # We found the class we're looking for!
840                 searched_class_id = class_id
841             u30()  # super_name idx
842             flags = read_byte()
843             if flags & 0x08 != 0:  # Protected namespace is present
844                 u30()  # protected_ns_idx
845             intrf_count = u30()
846             for _c2 in range(intrf_count):
847                 u30()
848             u30()  # iinit
849             trait_count = u30()
850             for _c2 in range(trait_count):
851                 parse_traits_info()
852
853         if searched_class_id is None:
854             raise ExtractorError(u'Target class %r not found' %
855                                  TARGET_CLASSNAME)
856
857         method_names = {}
858         method_idxs = {}
859         for class_id in range(class_count):
860             u30()  # cinit
861             trait_count = u30()
862             for _c2 in range(trait_count):
863                 trait_methods = parse_traits_info()
864                 if class_id == searched_class_id:
865                     method_names.update(trait_methods.items())
866                     method_idxs.update(dict(
867                         (idx, name)
868                         for name, idx in trait_methods.items()))
869
870         # Scripts
871         script_count = u30()
872         for _c in range(script_count):
873             u30()  # init
874             trait_count = u30()
875             for _c2 in range(trait_count):
876                 parse_traits_info()
877
878         # Method bodies
879         method_body_count = u30()
880         Method = collections.namedtuple('Method', ['code', 'local_count'])
881         methods = {}
882         for _c in range(method_body_count):
883             method_idx = u30()
884             u30()  # max_stack
885             local_count = u30()
886             u30()  # init_scope_depth
887             u30()  # max_scope_depth
888             code_length = u30()
889             code = read_bytes(code_length)
890             if method_idx in method_idxs:
891                 m = Method(code, local_count)
892                 methods[method_idxs[method_idx]] = m
893             exception_count = u30()
894             for _c2 in range(exception_count):
895                 u30()  # from
896                 u30()  # to
897                 u30()  # target
898                 u30()  # exc_type
899                 u30()  # var_name
900             trait_count = u30()
901             for _c2 in range(trait_count):
902                 parse_traits_info()
903
904         assert p + code_reader.tell() == len(code_tag)
905         assert len(methods) == len(method_idxs)
906
907         method_pyfunctions = {}
908
909         def extract_function(func_name):
910             if func_name in method_pyfunctions:
911                 return method_pyfunctions[func_name]
912             if func_name not in methods:
913                 raise ExtractorError(u'Cannot find function %r' % func_name)
914             m = methods[func_name]
915
916             def resfunc(args):
917                 registers = ['(this)'] + list(args) + [None] * m.local_count
918                 stack = []
919                 coder = io.BytesIO(m.code)
920                 while True:
921                     opcode = struct.unpack('!B', coder.read(1))[0]
922                     if opcode == 36:  # pushbyte
923                         v = struct.unpack('!B', coder.read(1))[0]
924                         stack.append(v)
925                     elif opcode == 44:  # pushstring
926                         idx = u30(coder)
927                         stack.append(constant_strings[idx])
928                     elif opcode == 48:  # pushscope
929                         # We don't implement the scope register, so we'll just
930                         # ignore the popped value
931                         stack.pop()
932                     elif opcode == 70:  # callproperty
933                         index = u30(coder)
934                         mname = multinames[index]
935                         arg_count = u30(coder)
936                         args = list(reversed(
937                             [stack.pop() for _ in range(arg_count)]))
938                         obj = stack.pop()
939                         if mname == u'split':
940                             assert len(args) == 1
941                             assert isinstance(args[0], compat_str)
942                             assert isinstance(obj, compat_str)
943                             if args[0] == u'':
944                                 res = list(obj)
945                             else:
946                                 res = obj.split(args[0])
947                             stack.append(res)
948                         elif mname == u'slice':
949                             assert len(args) == 1
950                             assert isinstance(args[0], int)
951                             assert isinstance(obj, list)
952                             res = obj[args[0]:]
953                             stack.append(res)
954                         elif mname == u'join':
955                             assert len(args) == 1
956                             assert isinstance(args[0], compat_str)
957                             assert isinstance(obj, list)
958                             res = args[0].join(obj)
959                             stack.append(res)
960                         elif mname in method_pyfunctions:
961                             stack.append(method_pyfunctions[mname](args))
962                         else:
963                             raise NotImplementedError(
964                                 u'Unsupported property %r on %r'
965                                 % (mname, obj))
966                     elif opcode == 72:  # returnvalue
967                         res = stack.pop()
968                         return res
969                     elif opcode == 79:  # callpropvoid
970                         index = u30(coder)
971                         mname = multinames[index]
972                         arg_count = u30(coder)
973                         args = list(reversed(
974                             [stack.pop() for _ in range(arg_count)]))
975                         obj = stack.pop()
976                         if mname == u'reverse':
977                             assert isinstance(obj, list)
978                             obj.reverse()
979                         else:
980                             raise NotImplementedError(
981                                 u'Unsupported (void) property %r on %r'
982                                 % (mname, obj))
983                     elif opcode == 93:  # findpropstrict
984                         index = u30(coder)
985                         mname = multinames[index]
986                         res = extract_function(mname)
987                         stack.append(res)
988                     elif opcode == 97:  # setproperty
989                         index = u30(coder)
990                         value = stack.pop()
991                         idx = stack.pop()
992                         obj = stack.pop()
993                         assert isinstance(obj, list)
994                         assert isinstance(idx, int)
995                         obj[idx] = value
996                     elif opcode == 98:  # getlocal
997                         index = u30(coder)
998                         stack.append(registers[index])
999                     elif opcode == 99:  # setlocal
1000                         index = u30(coder)
1001                         value = stack.pop()
1002                         registers[index] = value
1003                     elif opcode == 102:  # getproperty
1004                         index = u30(coder)
1005                         pname = multinames[index]
1006                         if pname == u'length':
1007                             obj = stack.pop()
1008                             assert isinstance(obj, list)
1009                             stack.append(len(obj))
1010                         else:  # Assume attribute access
1011                             idx = stack.pop()
1012                             assert isinstance(idx, int)
1013                             obj = stack.pop()
1014                             assert isinstance(obj, list)
1015                             stack.append(obj[idx])
1016                     elif opcode == 128:  # coerce
1017                         u30(coder)
1018                     elif opcode == 133:  # coerce_s
1019                         assert isinstance(stack[-1], (type(None), compat_str))
1020                     elif opcode == 164:  # modulo
1021                         value2 = stack.pop()
1022                         value1 = stack.pop()
1023                         res = value1 % value2
1024                         stack.append(res)
1025                     elif opcode == 208:  # getlocal_0
1026                         stack.append(registers[0])
1027                     elif opcode == 209:  # getlocal_1
1028                         stack.append(registers[1])
1029                     elif opcode == 210:  # getlocal_2
1030                         stack.append(registers[2])
1031                     elif opcode == 211:  # getlocal_3
1032                         stack.append(registers[3])
1033                     elif opcode == 214:  # setlocal_2
1034                         registers[2] = stack.pop()
1035                     elif opcode == 215:  # setlocal_3
1036                         registers[3] = stack.pop()
1037                     else:
1038                         raise NotImplementedError(
1039                             u'Unsupported opcode %d' % opcode)
1040
1041             method_pyfunctions[func_name] = resfunc
1042             return resfunc
1043
1044         initial_function = extract_function(u'decipher')
1045         return lambda s: initial_function([s])
1046
1047     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1048         """Turn the encrypted s field into a working signature"""
1049
1050         if player_url is not None:
1051             try:
1052                 if player_url not in self._player_cache:
1053                     func = self._extract_signature_function(
1054                         video_id, player_url, len(s)
1055                     )
1056                     self._player_cache[player_url] = func
1057                 func = self._player_cache[player_url]
1058                 if self._downloader.params.get('youtube_print_sig_code'):
1059                     self._print_sig_code(func, len(s))
1060                 return func(s)
1061             except Exception:
1062                 tb = traceback.format_exc()
1063                 self._downloader.report_warning(
1064                     u'Automatic signature extraction failed: ' + tb)
1065
1066         self._downloader.report_warning(
1067             u'Warning: Falling back to static signature algorithm')
1068         return self._static_decrypt_signature(
1069             s, video_id, player_url, age_gate)
1070
1071     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1072         if age_gate:
1073             # The videos with age protection use another player, so the
1074             # algorithms can be different.
1075             if len(s) == 86:
1076                 return s[2:63] + s[82] + s[64:82] + s[63]
1077
1078         if len(s) == 93:
1079             return s[86:29:-1] + s[88] + s[28:5:-1]
1080         elif len(s) == 92:
1081             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1082         elif len(s) == 91:
1083             return s[84:27:-1] + s[86] + s[26:5:-1]
1084         elif len(s) == 90:
1085             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1086         elif len(s) == 89:
1087             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1088         elif len(s) == 88:
1089             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1090         elif len(s) == 87:
1091             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1092         elif len(s) == 86:
1093             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1094         elif len(s) == 85:
1095             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1096         elif len(s) == 84:
1097             return s[81:36:-1] + s[0] + s[35:2:-1]
1098         elif len(s) == 83:
1099             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1100         elif len(s) == 82:
1101             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1102         elif len(s) == 81:
1103             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1104         elif len(s) == 80:
1105             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1106         elif len(s) == 79:
1107             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1108
1109         else:
1110             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1111
1112     def _get_available_subtitles(self, video_id):
1113         try:
1114             sub_list = self._download_webpage(
1115                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1116                 video_id, note=False)
1117         except ExtractorError as err:
1118             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1119             return {}
1120         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1121
1122         sub_lang_list = {}
1123         for l in lang_list:
1124             lang = l[1]
1125             params = compat_urllib_parse.urlencode({
1126                 'lang': lang,
1127                 'v': video_id,
1128                 'fmt': self._downloader.params.get('subtitlesformat'),
1129             })
1130             url = u'http://www.youtube.com/api/timedtext?' + params
1131             sub_lang_list[lang] = url
1132         if not sub_lang_list:
1133             self._downloader.report_warning(u'video doesn\'t have subtitles')
1134             return {}
1135         return sub_lang_list
1136
1137     def _get_available_automatic_caption(self, video_id, webpage):
1138         """We need the webpage for getting the captions url, pass it as an
1139            argument to speed up the process."""
1140         sub_format = self._downloader.params.get('subtitlesformat')
1141         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1142         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1143         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1144         if mobj is None:
1145             self._downloader.report_warning(err_msg)
1146             return {}
1147         player_config = json.loads(mobj.group(1))
1148         try:
1149             args = player_config[u'args']
1150             caption_url = args[u'ttsurl']
1151             timestamp = args[u'timestamp']
1152             # We get the available subtitles
1153             list_params = compat_urllib_parse.urlencode({
1154                 'type': 'list',
1155                 'tlangs': 1,
1156                 'asrs': 1,
1157             })
1158             list_url = caption_url + '&' + list_params
1159             list_page = self._download_webpage(list_url, video_id)
1160             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1161             original_lang_node = caption_list.find('track')
1162             if original_lang_node.attrib.get('kind') != 'asr' :
1163                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1164                 return {}
1165             original_lang = original_lang_node.attrib['lang_code']
1166
1167             sub_lang_list = {}
1168             for lang_node in caption_list.findall('target'):
1169                 sub_lang = lang_node.attrib['lang_code']
1170                 params = compat_urllib_parse.urlencode({
1171                     'lang': original_lang,
1172                     'tlang': sub_lang,
1173                     'fmt': sub_format,
1174                     'ts': timestamp,
1175                     'kind': 'asr',
1176                 })
1177                 sub_lang_list[sub_lang] = caption_url + '&' + params
1178             return sub_lang_list
1179         # An extractor error can be raise by the download process if there are
1180         # no automatic captions but there are subtitles
1181         except (KeyError, ExtractorError):
1182             self._downloader.report_warning(err_msg)
1183             return {}
1184
1185     def _print_formats(self, formats):
1186         print('Available formats:')
1187         for x in formats:
1188             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1189                                         self._video_dimensions.get(x, '???'),
1190                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1191
1192     def _extract_id(self, url):
1193         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1194         if mobj is None:
1195             raise ExtractorError(u'Invalid URL: %s' % url)
1196         video_id = mobj.group(2)
1197         return video_id
1198
1199     def _get_video_url_list(self, url_map):
1200         """
1201         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1202         with the requested formats.
1203         """
1204         req_format = self._downloader.params.get('format', None)
1205         format_limit = self._downloader.params.get('format_limit', None)
1206         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1207         if format_limit is not None and format_limit in available_formats:
1208             format_list = available_formats[available_formats.index(format_limit):]
1209         else:
1210             format_list = available_formats
1211         existing_formats = [x for x in format_list if x in url_map]
1212         if len(existing_formats) == 0:
1213             raise ExtractorError(u'no known formats available for video')
1214         if self._downloader.params.get('listformats', None):
1215             self._print_formats(existing_formats)
1216             return
1217         if req_format is None or req_format == 'best':
1218             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1219         elif req_format == 'worst':
1220             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1221         elif req_format in ('-1', 'all'):
1222             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1223         else:
1224             # Specific formats. We pick the first in a slash-delimeted sequence.
1225             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1226             # available in the specified format. For example,
1227             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1228             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1229             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1230             req_formats = req_format.split('/')
1231             video_url_list = None
1232             for rf in req_formats:
1233                 if rf in url_map:
1234                     video_url_list = [(rf, url_map[rf])]
1235                     break
1236                 if rf in self._video_formats_map:
1237                     for srf in self._video_formats_map[rf]:
1238                         if srf in url_map:
1239                             video_url_list = [(srf, url_map[srf])]
1240                             break
1241                     else:
1242                         continue
1243                     break
1244             if video_url_list is None:
1245                 raise ExtractorError(u'requested format not available')
1246         return video_url_list
1247
1248     def _extract_from_m3u8(self, manifest_url, video_id):
1249         url_map = {}
1250         def _get_urls(_manifest):
1251             lines = _manifest.split('\n')
1252             urls = filter(lambda l: l and not l.startswith('#'),
1253                             lines)
1254             return urls
1255         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1256         formats_urls = _get_urls(manifest)
1257         for format_url in formats_urls:
1258             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1259             url_map[itag] = format_url
1260         return url_map
1261
1262     def _real_extract(self, url):
1263         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1264             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1265
1266         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1267         mobj = re.search(self._NEXT_URL_RE, url)
1268         if mobj:
1269             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1270         video_id = self._extract_id(url)
1271
1272         # Get video webpage
1273         self.report_video_webpage_download(video_id)
1274         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1275         request = compat_urllib_request.Request(url)
1276         try:
1277             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1278         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1279             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1280
1281         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1282
1283         # Attempt to extract SWF player URL
1284         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1285         if mobj is not None:
1286             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1287         else:
1288             player_url = None
1289
1290         # Get video info
1291         self.report_video_info_webpage_download(video_id)
1292         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1293             self.report_age_confirmation()
1294             age_gate = True
1295             # We simulate the access to the video from www.youtube.com/v/{video_id}
1296             # this can be viewed without login into Youtube
1297             data = compat_urllib_parse.urlencode({'video_id': video_id,
1298                                                   'el': 'embedded',
1299                                                   'gl': 'US',
1300                                                   'hl': 'en',
1301                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1302                                                   'asv': 3,
1303                                                   'sts':'1588',
1304                                                   })
1305             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1306             video_info_webpage = self._download_webpage(video_info_url, video_id,
1307                                     note=False,
1308                                     errnote='unable to download video info webpage')
1309             video_info = compat_parse_qs(video_info_webpage)
1310         else:
1311             age_gate = False
1312             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1313                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1314                         % (video_id, el_type))
1315                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1316                                         note=False,
1317                                         errnote='unable to download video info webpage')
1318                 video_info = compat_parse_qs(video_info_webpage)
1319                 if 'token' in video_info:
1320                     break
1321         if 'token' not in video_info:
1322             if 'reason' in video_info:
1323                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1324             else:
1325                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1326
1327         # Check for "rental" videos
1328         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1329             raise ExtractorError(u'"rental" videos not supported')
1330
1331         # Start extracting information
1332         self.report_information_extraction(video_id)
1333
1334         # uploader
1335         if 'author' not in video_info:
1336             raise ExtractorError(u'Unable to extract uploader name')
1337         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1338
1339         # uploader_id
1340         video_uploader_id = None
1341         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1342         if mobj is not None:
1343             video_uploader_id = mobj.group(1)
1344         else:
1345             self._downloader.report_warning(u'unable to extract uploader nickname')
1346
1347         # title
1348         if 'title' not in video_info:
1349             raise ExtractorError(u'Unable to extract video title')
1350         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1351
1352         # thumbnail image
1353         # We try first to get a high quality image:
1354         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1355                             video_webpage, re.DOTALL)
1356         if m_thumb is not None:
1357             video_thumbnail = m_thumb.group(1)
1358         elif 'thumbnail_url' not in video_info:
1359             self._downloader.report_warning(u'unable to extract video thumbnail')
1360             video_thumbnail = ''
1361         else:   # don't panic if we can't find it
1362             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1363
1364         # upload date
1365         upload_date = None
1366         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1367         if mobj is not None:
1368             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1369             upload_date = unified_strdate(upload_date)
1370
1371         # description
1372         video_description = get_element_by_id("eow-description", video_webpage)
1373         if video_description:
1374             video_description = clean_html(video_description)
1375         else:
1376             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1377             if fd_mobj:
1378                 video_description = unescapeHTML(fd_mobj.group(1))
1379             else:
1380                 video_description = u''
1381
1382         # subtitles
1383         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1384
1385         if self._downloader.params.get('listsubtitles', False):
1386             self._list_available_subtitles(video_id, video_webpage)
1387             return
1388
1389         if 'length_seconds' not in video_info:
1390             self._downloader.report_warning(u'unable to extract video duration')
1391             video_duration = ''
1392         else:
1393             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1394
1395         # Decide which formats to download
1396
1397         try:
1398             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1399             if not mobj:
1400                 raise ValueError('Could not find vevo ID')
1401             info = json.loads(mobj.group(1))
1402             args = info['args']
1403             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1404             # this signatures are encrypted
1405             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1406             if m_s is not None:
1407                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1408                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1409             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1410             if m_s is not None:
1411                 if 'url_encoded_fmt_stream_map' in video_info:
1412                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1413                 else:
1414                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1415             elif 'adaptive_fmts' in video_info:
1416                 if 'url_encoded_fmt_stream_map' in video_info:
1417                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1418                 else:
1419                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1420         except ValueError:
1421             pass
1422
1423         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1424             self.report_rtmp_download()
1425             video_url_list = [(None, video_info['conn'][0])]
1426         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1427             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1428                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1429             url_map = {}
1430             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1431                 url_data = compat_parse_qs(url_data_str)
1432                 if 'itag' in url_data and 'url' in url_data:
1433                     url = url_data['url'][0]
1434                     if 'sig' in url_data:
1435                         url += '&signature=' + url_data['sig'][0]
1436                     elif 's' in url_data:
1437                         encrypted_sig = url_data['s'][0]
1438                         if self._downloader.params.get('verbose'):
1439                             if age_gate:
1440                                 player_version = self._search_regex(
1441                                     r'-(.+)\.swf$',
1442                                     player_url if player_url else None,
1443                                     'flash player', fatal=False)
1444                                 player_desc = 'flash player %s' % player_version
1445                             else:
1446                                 player_version = self._search_regex(
1447                                     r'html5player-(.+?)\.js', video_webpage,
1448                                     'html5 player', fatal=False)
1449                                 player_desc = u'html5 player %s' % player_version
1450
1451                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1452                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1453                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1454
1455                         if not age_gate:
1456                             jsplayer_url_json = self._search_regex(
1457                                 r'"assets":.+?"js":\s*("[^"]+")',
1458                                 video_webpage, u'JS player URL')
1459                             player_url = json.loads(jsplayer_url_json)
1460
1461                         signature = self._decrypt_signature(
1462                             encrypted_sig, video_id, player_url, age_gate)
1463                         url += '&signature=' + signature
1464                     if 'ratebypass' not in url:
1465                         url += '&ratebypass=yes'
1466                     url_map[url_data['itag'][0]] = url
1467             video_url_list = self._get_video_url_list(url_map)
1468             if not video_url_list:
1469                 return
1470         elif video_info.get('hlsvp'):
1471             manifest_url = video_info['hlsvp'][0]
1472             url_map = self._extract_from_m3u8(manifest_url, video_id)
1473             video_url_list = self._get_video_url_list(url_map)
1474             if not video_url_list:
1475                 return
1476
1477         else:
1478             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1479
1480         results = []
1481         for format_param, video_real_url in video_url_list:
1482             # Extension
1483             video_extension = self._video_extensions.get(format_param, 'flv')
1484
1485             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1486                                               self._video_dimensions.get(format_param, '???'),
1487                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1488
1489             results.append({
1490                 'id':       video_id,
1491                 'url':      video_real_url,
1492                 'uploader': video_uploader,
1493                 'uploader_id': video_uploader_id,
1494                 'upload_date':  upload_date,
1495                 'title':    video_title,
1496                 'ext':      video_extension,
1497                 'format':   video_format,
1498                 'thumbnail':    video_thumbnail,
1499                 'description':  video_description,
1500                 'player_url':   player_url,
1501                 'subtitles':    video_subtitles,
1502                 'duration':     video_duration
1503             })
1504         return results
1505
1506 class YoutubePlaylistIE(InfoExtractor):
1507     IE_DESC = u'YouTube.com playlists'
1508     _VALID_URL = r"""(?:
1509                         (?:https?://)?
1510                         (?:\w+\.)?
1511                         youtube\.com/
1512                         (?:
1513                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1514                            \? (?:.*?&)*? (?:p|a|list)=
1515                         |  p/
1516                         )
1517                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1518                         .*
1519                      |
1520                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1521                      )"""
1522     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1523     _MAX_RESULTS = 50
1524     IE_NAME = u'youtube:playlist'
1525
1526     @classmethod
1527     def suitable(cls, url):
1528         """Receives a URL and returns True if suitable for this IE."""
1529         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1530
1531     def _real_extract(self, url):
1532         # Extract playlist id
1533         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1534         if mobj is None:
1535             raise ExtractorError(u'Invalid URL: %s' % url)
1536
1537         # Download playlist videos from API
1538         playlist_id = mobj.group(1) or mobj.group(2)
1539         videos = []
1540
1541         for page_num in itertools.count(1):
1542             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1543             if start_index >= 1000:
1544                 self._downloader.report_warning(u'Max number of results reached')
1545                 break
1546             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1547             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1548
1549             try:
1550                 response = json.loads(page)
1551             except ValueError as err:
1552                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1553
1554             if 'feed' not in response:
1555                 raise ExtractorError(u'Got a malformed response from YouTube API')
1556             playlist_title = response['feed']['title']['$t']
1557             if 'entry' not in response['feed']:
1558                 # Number of videos is a multiple of self._MAX_RESULTS
1559                 break
1560
1561             for entry in response['feed']['entry']:
1562                 index = entry['yt$position']['$t']
1563                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1564                     videos.append((
1565                         index,
1566                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1567                     ))
1568
1569         videos = [v[1] for v in sorted(videos)]
1570
1571         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1572         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1573
1574
1575 class YoutubeChannelIE(InfoExtractor):
1576     IE_DESC = u'YouTube.com channels'
1577     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1578     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1579     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1580     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1581     IE_NAME = u'youtube:channel'
1582
1583     def extract_videos_from_page(self, page):
1584         ids_in_page = []
1585         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1586             if mobj.group(1) not in ids_in_page:
1587                 ids_in_page.append(mobj.group(1))
1588         return ids_in_page
1589
1590     def _real_extract(self, url):
1591         # Extract channel id
1592         mobj = re.match(self._VALID_URL, url)
1593         if mobj is None:
1594             raise ExtractorError(u'Invalid URL: %s' % url)
1595
1596         # Download channel page
1597         channel_id = mobj.group(1)
1598         video_ids = []
1599         pagenum = 1
1600
1601         url = self._TEMPLATE_URL % (channel_id, pagenum)
1602         page = self._download_webpage(url, channel_id,
1603                                       u'Downloading page #%s' % pagenum)
1604
1605         # Extract video identifiers
1606         ids_in_page = self.extract_videos_from_page(page)
1607         video_ids.extend(ids_in_page)
1608
1609         # Download any subsequent channel pages using the json-based channel_ajax query
1610         if self._MORE_PAGES_INDICATOR in page:
1611             for pagenum in itertools.count(1):
1612                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1613                 page = self._download_webpage(url, channel_id,
1614                                               u'Downloading page #%s' % pagenum)
1615
1616                 page = json.loads(page)
1617
1618                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1619                 video_ids.extend(ids_in_page)
1620
1621                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1622                     break
1623
1624         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1625
1626         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1627         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1628         return [self.playlist_result(url_entries, channel_id)]
1629
1630
1631 class YoutubeUserIE(InfoExtractor):
1632     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1633     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1634     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1635     _GDATA_PAGE_SIZE = 50
1636     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1637     IE_NAME = u'youtube:user'
1638
1639     @classmethod
1640     def suitable(cls, url):
1641         # Don't return True if the url can be extracted with other youtube
1642         # extractor, the regex would is too permissive and it would match.
1643         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1644         if any(ie.suitable(url) for ie in other_ies): return False
1645         else: return super(YoutubeUserIE, cls).suitable(url)
1646
1647     def _real_extract(self, url):
1648         # Extract username
1649         mobj = re.match(self._VALID_URL, url)
1650         if mobj is None:
1651             raise ExtractorError(u'Invalid URL: %s' % url)
1652
1653         username = mobj.group(1)
1654
1655         # Download video ids using YouTube Data API. Result size per
1656         # query is limited (currently to 50 videos) so we need to query
1657         # page by page until there are no video ids - it means we got
1658         # all of them.
1659
1660         video_ids = []
1661
1662         for pagenum in itertools.count(0):
1663             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1664
1665             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1666             page = self._download_webpage(gdata_url, username,
1667                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1668
1669             try:
1670                 response = json.loads(page)
1671             except ValueError as err:
1672                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1673             if 'entry' not in response['feed']:
1674                 # Number of videos is a multiple of self._MAX_RESULTS
1675                 break
1676
1677             # Extract video identifiers
1678             ids_in_page = []
1679             for entry in response['feed']['entry']:
1680                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1681             video_ids.extend(ids_in_page)
1682
1683             # A little optimization - if current page is not
1684             # "full", ie. does not contain PAGE_SIZE video ids then
1685             # we can assume that this page is the last one - there
1686             # are no more ids on further pages - no need to query
1687             # again.
1688
1689             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1690                 break
1691
1692         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1693         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1694         return [self.playlist_result(url_results, playlist_title = username)]
1695
1696 class YoutubeSearchIE(SearchInfoExtractor):
1697     IE_DESC = u'YouTube.com searches'
1698     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1699     _MAX_RESULTS = 1000
1700     IE_NAME = u'youtube:search'
1701     _SEARCH_KEY = 'ytsearch'
1702
1703     def report_download_page(self, query, pagenum):
1704         """Report attempt to download search page with given number."""
1705         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1706
1707     def _get_n_results(self, query, n):
1708         """Get a specified number of results for a query"""
1709
1710         video_ids = []
1711         pagenum = 0
1712         limit = n
1713
1714         while (50 * pagenum) < limit:
1715             self.report_download_page(query, pagenum+1)
1716             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1717             request = compat_urllib_request.Request(result_url)
1718             try:
1719                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1720             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1721                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1722             api_response = json.loads(data)['data']
1723
1724             if not 'items' in api_response:
1725                 raise ExtractorError(u'[youtube] No video results')
1726
1727             new_ids = list(video['id'] for video in api_response['items'])
1728             video_ids += new_ids
1729
1730             limit = min(n, api_response['totalItems'])
1731             pagenum += 1
1732
1733         if len(video_ids) > n:
1734             video_ids = video_ids[:n]
1735         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1736         return self.playlist_result(videos, query)
1737
1738
1739 class YoutubeShowIE(InfoExtractor):
1740     IE_DESC = u'YouTube.com (multi-season) shows'
1741     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1742     IE_NAME = u'youtube:show'
1743
1744     def _real_extract(self, url):
1745         mobj = re.match(self._VALID_URL, url)
1746         show_name = mobj.group(1)
1747         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1748         # There's one playlist for each season of the show
1749         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1750         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1751         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1752
1753
1754 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1755     """
1756     Base class for extractors that fetch info from
1757     http://www.youtube.com/feed_ajax
1758     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1759     """
1760     _LOGIN_REQUIRED = True
1761     _PAGING_STEP = 30
1762     # use action_load_personal_feed instead of action_load_system_feed
1763     _PERSONAL_FEED = False
1764
1765     @property
1766     def _FEED_TEMPLATE(self):
1767         action = 'action_load_system_feed'
1768         if self._PERSONAL_FEED:
1769             action = 'action_load_personal_feed'
1770         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1771
1772     @property
1773     def IE_NAME(self):
1774         return u'youtube:%s' % self._FEED_NAME
1775
1776     def _real_initialize(self):
1777         self._login()
1778
1779     def _real_extract(self, url):
1780         feed_entries = []
1781         # The step argument is available only in 2.7 or higher
1782         for i in itertools.count(0):
1783             paging = i*self._PAGING_STEP
1784             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1785                                           u'%s feed' % self._FEED_NAME,
1786                                           u'Downloading page %s' % i)
1787             info = json.loads(info)
1788             feed_html = info['feed_html']
1789             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1790             ids = orderedSet(m.group(1) for m in m_ids)
1791             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1792             if info['paging'] is None:
1793                 break
1794         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1795
1796 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1797     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1798     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1799     _FEED_NAME = 'subscriptions'
1800     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1801
1802 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1803     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1804     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1805     _FEED_NAME = 'recommended'
1806     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1807
1808 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1809     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1810     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1811     _FEED_NAME = 'watch_later'
1812     _PLAYLIST_TITLE = u'Youtube Watch Later'
1813     _PAGING_STEP = 100
1814     _PERSONAL_FEED = True
1815
1816 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1817     IE_NAME = u'youtube:favorites'
1818     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1820     _LOGIN_REQUIRED = True
1821
1822     def _real_extract(self, url):
1823         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1824         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1825         return self.url_result(playlist_id, 'YoutubePlaylist')