Merge branch 'automatic-signatures'
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import socket
11 import string
12 import struct
13 import traceback
14 import xml.etree.ElementTree
15 import zlib
16
17 from .common import InfoExtractor, SearchInfoExtractor
18 from .subtitles import SubtitlesInfoExtractor
19 from ..utils import (
20     compat_chr,
21     compat_http_client,
22     compat_parse_qs,
23     compat_urllib_error,
24     compat_urllib_parse,
25     compat_urllib_request,
26     compat_str,
27
28     clean_html,
29     get_element_by_id,
30     ExtractorError,
31     unescapeHTML,
32     unified_strdate,
33     orderedSet,
34     write_json_file,
35 )
36
37 class YoutubeBaseInfoExtractor(InfoExtractor):
38     """Provide base functions for Youtube extractors"""
39     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
40     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
41     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
42     _NETRC_MACHINE = 'youtube'
43     # If True it will raise an error if no login info is provided
44     _LOGIN_REQUIRED = False
45
46     def report_lang(self):
47         """Report attempt to set language."""
48         self.to_screen(u'Setting language')
49
50     def _set_language(self):
51         request = compat_urllib_request.Request(self._LANG_URL)
52         try:
53             self.report_lang()
54             compat_urllib_request.urlopen(request).read()
55         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
56             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
57             return False
58         return True
59
60     def _login(self):
61         (username, password) = self._get_login_info()
62         # No authentication to be performed
63         if username is None:
64             if self._LOGIN_REQUIRED:
65                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
66             return False
67
68         request = compat_urllib_request.Request(self._LOGIN_URL)
69         try:
70             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
71         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
72             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
73             return False
74
75         galx = None
76         dsh = None
77         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
78         if match:
79           galx = match.group(1)
80         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
81         if match:
82           dsh = match.group(1)
83
84         # Log in
85         login_form_strs = {
86                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
87                 u'Email': username,
88                 u'GALX': galx,
89                 u'Passwd': password,
90                 u'PersistentCookie': u'yes',
91                 u'_utf8': u'霱',
92                 u'bgresponse': u'js_disabled',
93                 u'checkConnection': u'',
94                 u'checkedDomains': u'youtube',
95                 u'dnConn': u'',
96                 u'dsh': dsh,
97                 u'pstMsg': u'0',
98                 u'rmShown': u'1',
99                 u'secTok': u'',
100                 u'signIn': u'Sign in',
101                 u'timeStmp': u'',
102                 u'service': u'youtube',
103                 u'uilel': u'3',
104                 u'hl': u'en_US',
105         }
106         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
107         # chokes on unicode
108         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
109         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
110         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
111         try:
112             self.report_login()
113             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
114             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
115                 self._downloader.report_warning(u'unable to log in: bad username or password')
116                 return False
117         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
118             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
119             return False
120         return True
121
122     def _confirm_age(self):
123         age_form = {
124                 'next_url':     '/',
125                 'action_confirm':   'Confirm',
126                 }
127         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
128         try:
129             self.report_age_confirmation()
130             compat_urllib_request.urlopen(request).read().decode('utf-8')
131         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
132             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
133         return True
134
135     def _real_initialize(self):
136         if self._downloader is None:
137             return
138         if not self._set_language():
139             return
140         if not self._login():
141             return
142         self._confirm_age()
143
144
145 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
146     IE_DESC = u'YouTube.com'
147     _VALID_URL = r"""^
148                      (
149                          (?:https?://)?                                       # http(s):// (optional)
150                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
151                             tube\.majestyc\.net/|
152                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
153                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
154                          (?:                                                  # the various things that can precede the ID:
155                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
156                              |(?:                                             # or the v= param in all its forms
157                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
158                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
159                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
160                                  v=
161                              )
162                          ))
163                          |youtu\.be/                                          # just youtu.be/xxxx
164                          )
165                      )?                                                       # all until now is optional -> you can pass the naked ID
166                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
167                      (?(1).+)?                                                # if we found the ID, everything can follow
168                      $"""
169     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
170     # Listed in order of quality
171     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
172                           # Apple HTTP Live Streaming
173                           '96', '95', '94', '93', '92', '132', '151',
174                           # 3D
175                           '85', '84', '102', '83', '101', '82', '100',
176                           # Dash video
177                           '138', '137', '248', '136', '247', '135', '246',
178                           '245', '244', '134', '243', '133', '242', '160',
179                           # Dash audio
180                           '141', '172', '140', '171', '139',
181                           ]
182     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
183                                       # Apple HTTP Live Streaming
184                                       '96', '95', '94', '93', '92', '132', '151',
185                                       # 3D
186                                       '85', '102', '84', '101', '83', '100', '82',
187                                       # Dash video
188                                       '138', '248', '137', '247', '136', '246', '245',
189                                       '244', '135', '243', '134', '242', '133', '160',
190                                       # Dash audio
191                                       '172', '141', '171', '140', '139',
192                                       ]
193     _video_formats_map = {
194         'flv': ['35', '34', '6', '5'],
195         '3gp': ['36', '17', '13'],
196         'mp4': ['38', '37', '22', '18'],
197         'webm': ['46', '45', '44', '43'],
198     }
199     _video_extensions = {
200         '13': '3gp',
201         '17': '3gp',
202         '18': 'mp4',
203         '22': 'mp4',
204         '36': '3gp',
205         '37': 'mp4',
206         '38': 'mp4',
207         '43': 'webm',
208         '44': 'webm',
209         '45': 'webm',
210         '46': 'webm',
211
212         # 3d videos
213         '82': 'mp4',
214         '83': 'mp4',
215         '84': 'mp4',
216         '85': 'mp4',
217         '100': 'webm',
218         '101': 'webm',
219         '102': 'webm',
220
221         # Apple HTTP Live Streaming
222         '92': 'mp4',
223         '93': 'mp4',
224         '94': 'mp4',
225         '95': 'mp4',
226         '96': 'mp4',
227         '132': 'mp4',
228         '151': 'mp4',
229
230         # Dash mp4
231         '133': 'mp4',
232         '134': 'mp4',
233         '135': 'mp4',
234         '136': 'mp4',
235         '137': 'mp4',
236         '138': 'mp4',
237         '139': 'mp4',
238         '140': 'mp4',
239         '141': 'mp4',
240         '160': 'mp4',
241
242         # Dash webm
243         '171': 'webm',
244         '172': 'webm',
245         '242': 'webm',
246         '243': 'webm',
247         '244': 'webm',
248         '245': 'webm',
249         '246': 'webm',
250         '247': 'webm',
251         '248': 'webm',
252     }
253     _video_dimensions = {
254         '5': '240x400',
255         '6': '???',
256         '13': '???',
257         '17': '144x176',
258         '18': '360x640',
259         '22': '720x1280',
260         '34': '360x640',
261         '35': '480x854',
262         '36': '240x320',
263         '37': '1080x1920',
264         '38': '3072x4096',
265         '43': '360x640',
266         '44': '480x854',
267         '45': '720x1280',
268         '46': '1080x1920',
269         '82': '360p',
270         '83': '480p',
271         '84': '720p',
272         '85': '1080p',
273         '92': '240p',
274         '93': '360p',
275         '94': '480p',
276         '95': '720p',
277         '96': '1080p',
278         '100': '360p',
279         '101': '480p',
280         '102': '720p',
281         '132': '240p',
282         '151': '72p',
283         '133': '240p',
284         '134': '360p',
285         '135': '480p',
286         '136': '720p',
287         '137': '1080p',
288         '138': '>1080p',
289         '139': '48k',
290         '140': '128k',
291         '141': '256k',
292         '160': '192p',
293         '171': '128k',
294         '172': '256k',
295         '242': '240p',
296         '243': '360p',
297         '244': '480p',
298         '245': '480p',
299         '246': '480p',
300         '247': '720p',
301         '248': '1080p',
302     }
303     _special_itags = {
304         '82': '3D',
305         '83': '3D',
306         '84': '3D',
307         '85': '3D',
308         '100': '3D',
309         '101': '3D',
310         '102': '3D',
311         '133': 'DASH Video',
312         '134': 'DASH Video',
313         '135': 'DASH Video',
314         '136': 'DASH Video',
315         '137': 'DASH Video',
316         '138': 'DASH Video',
317         '139': 'DASH Audio',
318         '140': 'DASH Audio',
319         '141': 'DASH Audio',
320         '160': 'DASH Video',
321         '171': 'DASH Audio',
322         '172': 'DASH Audio',
323         '242': 'DASH Video',
324         '243': 'DASH Video',
325         '244': 'DASH Video',
326         '245': 'DASH Video',
327         '246': 'DASH Video',
328         '247': 'DASH Video',
329         '248': 'DASH Video',
330     }
331
332     IE_NAME = u'youtube'
333     _TESTS = [
334         {
335             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
336             u"file":  u"BaW_jenozKc.mp4",
337             u"info_dict": {
338                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
339                 u"uploader": u"Philipp Hagemeister",
340                 u"uploader_id": u"phihag",
341                 u"upload_date": u"20121002",
342                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
343             }
344         },
345         {
346             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
347             u"file":  u"1ltcDfZMA3U.flv",
348             u"note": u"Test VEVO video (#897)",
349             u"info_dict": {
350                 u"upload_date": u"20070518",
351                 u"title": u"Maps - It Will Find You",
352                 u"description": u"Music video by Maps performing It Will Find You.",
353                 u"uploader": u"MuteUSA",
354                 u"uploader_id": u"MuteUSA"
355             }
356         },
357         {
358             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
359             u"file":  u"UxxajLWwzqY.mp4",
360             u"note": u"Test generic use_cipher_signature video (#897)",
361             u"info_dict": {
362                 u"upload_date": u"20120506",
363                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
364                 u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b",
365                 u"uploader": u"Icona Pop",
366                 u"uploader_id": u"IconaPop"
367             }
368         },
369         {
370             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
371             u"file":  u"07FYdnEawAQ.mp4",
372             u"note": u"Test VEVO video with age protection (#956)",
373             u"info_dict": {
374                 u"upload_date": u"20130703",
375                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
376                 u"description": u"md5:64249768eec3bc4276236606ea996373",
377                 u"uploader": u"justintimberlakeVEVO",
378                 u"uploader_id": u"justintimberlakeVEVO"
379             }
380         },
381         {
382             u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
383             u'file': u'TGi3HqYrWHE.mp4',
384             u'note': u'm3u8 video',
385             u'info_dict': {
386                 u'title': u'Triathlon - Men - London 2012 Olympic Games',
387                 u'description': u'- Men -  TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
388                 u'uploader': u'olympic',
389                 u'upload_date': u'20120807',
390                 u'uploader_id': u'olympic',
391             },
392             u'params': {
393                 u'skip_download': True,
394             },
395         },
396     ]
397
398
399     @classmethod
400     def suitable(cls, url):
401         """Receives a URL and returns True if suitable for this IE."""
402         if YoutubePlaylistIE.suitable(url): return False
403         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
404
405     def __init__(self, *args, **kwargs):
406         super(YoutubeIE, self).__init__(*args, **kwargs)
407         self._player_cache = {}
408
409     def report_video_webpage_download(self, video_id):
410         """Report attempt to download video webpage."""
411         self.to_screen(u'%s: Downloading video webpage' % video_id)
412
413     def report_video_info_webpage_download(self, video_id):
414         """Report attempt to download video info webpage."""
415         self.to_screen(u'%s: Downloading video info webpage' % video_id)
416
417     def report_information_extraction(self, video_id):
418         """Report attempt to extract video information."""
419         self.to_screen(u'%s: Extracting video information' % video_id)
420
421     def report_unavailable_format(self, video_id, format):
422         """Report extracted video URL."""
423         self.to_screen(u'%s: Format %s not available' % (video_id, format))
424
425     def report_rtmp_download(self):
426         """Indicate the download will use the RTMP protocol."""
427         self.to_screen(u'RTMP download detected')
428
429     def _extract_signature_function(self, video_id, player_url, slen):
430         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
431                         player_url)
432         player_type = id_m.group('ext')
433         player_id = id_m.group('id')
434
435         # Read from filesystem cache
436         func_id = '%s_%s_%d' % (player_type, player_id, slen)
437         assert os.path.basename(func_id) == func_id
438         cache_dir = self._downloader.params.get('cachedir',
439                                                 u'~/.youtube-dl/cache')
440
441         cache_enabled = cache_dir is not None
442         if cache_enabled:
443             cache_fn = os.path.join(os.path.expanduser(cache_dir),
444                                     u'youtube-sigfuncs',
445                                     func_id + '.json')
446             try:
447                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
448                     cache_spec = json.load(cachef)
449                 return lambda s: u''.join(s[i] for i in cache_spec)
450             except IOError:
451                 pass  # No cache available
452
453         if player_type == 'js':
454             code = self._download_webpage(
455                 player_url, video_id,
456                 note=u'Downloading %s player %s' % (player_type, player_id),
457                 errnote=u'Download of %s failed' % player_url)
458             res = self._parse_sig_js(code)
459         elif player_type == 'swf':
460             urlh = self._request_webpage(
461                 player_url, video_id,
462                 note=u'Downloading %s player %s' % (player_type, player_id),
463                 errnote=u'Download of %s failed' % player_url)
464             code = urlh.read()
465             res = self._parse_sig_swf(code)
466         else:
467             assert False, 'Invalid player type %r' % player_type
468
469         if cache_enabled:
470             try:
471                 test_string = u''.join(map(compat_chr, range(slen)))
472                 cache_res = res(test_string)
473                 cache_spec = [ord(c) for c in cache_res]
474                 try:
475                     os.makedirs(os.path.dirname(cache_fn))
476                 except OSError as ose:
477                     if ose.errno != errno.EEXIST:
478                         raise
479                 write_json_file(cache_spec, cache_fn)
480             except Exception:
481                 tb = traceback.format_exc()
482                 self._downloader.report_warning(
483                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
484
485         return res
486
487     def _print_sig_code(self, func, slen):
488         def gen_sig_code(idxs):
489             def _genslice(start, end, step):
490                 starts = u'' if start == 0 else str(start)
491                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
492                 steps = u'' if step == 1 else (u':%d' % step)
493                 return u's[%s%s%s]' % (starts, ends, steps)
494
495             step = None
496             start = '(Never used)'  # Quelch pyflakes warnings - start will be
497                                     # set as soon as step is set
498             for i, prev in zip(idxs[1:], idxs[:-1]):
499                 if step is not None:
500                     if i - prev == step:
501                         continue
502                     yield _genslice(start, prev, step)
503                     step = None
504                     continue
505                 if i - prev in [-1, 1]:
506                     step = i - prev
507                     start = prev
508                     continue
509                 else:
510                     yield u's[%d]' % prev
511             if step is None:
512                 yield u's[%d]' % i
513             else:
514                 yield _genslice(start, i, step)
515
516         test_string = u''.join(map(compat_chr, range(slen)))
517         cache_res = func(test_string)
518         cache_spec = [ord(c) for c in cache_res]
519         expr_code = u' + '.join(gen_sig_code(cache_spec))
520         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
521         self.to_screen(u'Extracted signature function:\n' + code)
522
523     def _parse_sig_js(self, jscode):
524         funcname = self._search_regex(
525             r'signature=([a-zA-Z]+)', jscode,
526             u'Initial JS player signature function name')
527
528         functions = {}
529
530         def argidx(varname):
531             return string.lowercase.index(varname)
532
533         def interpret_statement(stmt, local_vars, allow_recursion=20):
534             if allow_recursion < 0:
535                 raise ExtractorError(u'Recursion limit reached')
536
537             if stmt.startswith(u'var '):
538                 stmt = stmt[len(u'var '):]
539             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
540                              r'=(?P<expr>.*)$', stmt)
541             if ass_m:
542                 if ass_m.groupdict().get('index'):
543                     def assign(val):
544                         lvar = local_vars[ass_m.group('out')]
545                         idx = interpret_expression(ass_m.group('index'),
546                                                    local_vars, allow_recursion)
547                         assert isinstance(idx, int)
548                         lvar[idx] = val
549                         return val
550                     expr = ass_m.group('expr')
551                 else:
552                     def assign(val):
553                         local_vars[ass_m.group('out')] = val
554                         return val
555                     expr = ass_m.group('expr')
556             elif stmt.startswith(u'return '):
557                 assign = lambda v: v
558                 expr = stmt[len(u'return '):]
559             else:
560                 raise ExtractorError(
561                     u'Cannot determine left side of statement in %r' % stmt)
562
563             v = interpret_expression(expr, local_vars, allow_recursion)
564             return assign(v)
565
566         def interpret_expression(expr, local_vars, allow_recursion):
567             if expr.isdigit():
568                 return int(expr)
569
570             if expr.isalpha():
571                 return local_vars[expr]
572
573             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
574             if m:
575                 member = m.group('member')
576                 val = local_vars[m.group('in')]
577                 if member == 'split("")':
578                     return list(val)
579                 if member == 'join("")':
580                     return u''.join(val)
581                 if member == 'length':
582                     return len(val)
583                 if member == 'reverse()':
584                     return val[::-1]
585                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
586                 if slice_m:
587                     idx = interpret_expression(
588                         slice_m.group('idx'), local_vars, allow_recursion-1)
589                     return val[idx:]
590
591             m = re.match(
592                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
593             if m:
594                 val = local_vars[m.group('in')]
595                 idx = interpret_expression(m.group('idx'), local_vars,
596                                            allow_recursion-1)
597                 return val[idx]
598
599             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
600             if m:
601                 a = interpret_expression(m.group('a'),
602                                          local_vars, allow_recursion)
603                 b = interpret_expression(m.group('b'),
604                                          local_vars, allow_recursion)
605                 return a % b
606
607             m = re.match(
608                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
609             if m:
610                 fname = m.group('func')
611                 if fname not in functions:
612                     functions[fname] = extract_function(fname)
613                 argvals = [int(v) if v.isdigit() else local_vars[v]
614                            for v in m.group('args').split(',')]
615                 return functions[fname](argvals)
616             raise ExtractorError(u'Unsupported JS expression %r' % expr)
617
618         def extract_function(funcname):
619             func_m = re.search(
620                 r'function ' + re.escape(funcname) +
621                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
622                 jscode)
623             argnames = func_m.group('args').split(',')
624
625             def resf(args):
626                 local_vars = dict(zip(argnames, args))
627                 for stmt in func_m.group('code').split(';'):
628                     res = interpret_statement(stmt, local_vars)
629                 return res
630             return resf
631
632         initial_function = extract_function(funcname)
633         return lambda s: initial_function([s])
634
635     def _parse_sig_swf(self, file_contents):
636         if file_contents[1:3] != b'WS':
637             raise ExtractorError(
638                 u'Not an SWF file; header is %r' % file_contents[:3])
639         if file_contents[:1] == b'C':
640             content = zlib.decompress(file_contents[8:])
641         else:
642             raise NotImplementedError(u'Unsupported compression format %r' %
643                                       file_contents[:1])
644
645         def extract_tags(content):
646             pos = 0
647             while pos < len(content):
648                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
649                 pos += 2
650                 tag_code = header16 >> 6
651                 tag_len = header16 & 0x3f
652                 if tag_len == 0x3f:
653                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
654                     pos += 4
655                 assert pos+tag_len <= len(content)
656                 yield (tag_code, content[pos:pos+tag_len])
657                 pos += tag_len
658
659         code_tag = next(tag
660                         for tag_code, tag in extract_tags(content)
661                         if tag_code == 82)
662         p = code_tag.index(b'\0', 4) + 1
663         code_reader = io.BytesIO(code_tag[p:])
664
665         # Parse ABC (AVM2 ByteCode)
666         def read_int(reader=None):
667             if reader is None:
668                 reader = code_reader
669             res = 0
670             shift = 0
671             for _ in range(5):
672                 buf = reader.read(1)
673                 assert len(buf) == 1
674                 b = struct.unpack('<B', buf)[0]
675                 res = res | ((b & 0x7f) << shift)
676                 if b & 0x80 == 0:
677                     break
678                 shift += 7
679             return res
680
681         def u30(reader=None):
682             res = read_int(reader)
683             assert res & 0xf0000000 == 0
684             return res
685         u32 = read_int
686
687         def s32(reader=None):
688             v = read_int(reader)
689             if v & 0x80000000 != 0:
690                 v = - ((v ^ 0xffffffff) + 1)
691             return v
692
693         def read_string(reader=None):
694             if reader is None:
695                 reader = code_reader
696             slen = u30(reader)
697             resb = reader.read(slen)
698             assert len(resb) == slen
699             return resb.decode('utf-8')
700
701         def read_bytes(count, reader=None):
702             if reader is None:
703                 reader = code_reader
704             resb = reader.read(count)
705             assert len(resb) == count
706             return resb
707
708         def read_byte(reader=None):
709             resb = read_bytes(1, reader=reader)
710             res = struct.unpack('<B', resb)[0]
711             return res
712
713         # minor_version + major_version
714         read_bytes(2 + 2)
715
716         # Constant pool
717         int_count = u30()
718         for _c in range(1, int_count):
719             s32()
720         uint_count = u30()
721         for _c in range(1, uint_count):
722             u32()
723         double_count = u30()
724         read_bytes((double_count-1) * 8)
725         string_count = u30()
726         constant_strings = [u'']
727         for _c in range(1, string_count):
728             s = read_string()
729             constant_strings.append(s)
730         namespace_count = u30()
731         for _c in range(1, namespace_count):
732             read_bytes(1)  # kind
733             u30()  # name
734         ns_set_count = u30()
735         for _c in range(1, ns_set_count):
736             count = u30()
737             for _c2 in range(count):
738                 u30()
739         multiname_count = u30()
740         MULTINAME_SIZES = {
741             0x07: 2,  # QName
742             0x0d: 2,  # QNameA
743             0x0f: 1,  # RTQName
744             0x10: 1,  # RTQNameA
745             0x11: 0,  # RTQNameL
746             0x12: 0,  # RTQNameLA
747             0x09: 2,  # Multiname
748             0x0e: 2,  # MultinameA
749             0x1b: 1,  # MultinameL
750             0x1c: 1,  # MultinameLA
751         }
752         multinames = [u'']
753         for _c in range(1, multiname_count):
754             kind = u30()
755             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
756             if kind == 0x07:
757                 u30()  # namespace_idx
758                 name_idx = u30()
759                 multinames.append(constant_strings[name_idx])
760             else:
761                 multinames.append('[MULTINAME kind: %d]' % kind)
762                 for _c2 in range(MULTINAME_SIZES[kind]):
763                     u30()
764
765         # Methods
766         method_count = u30()
767         MethodInfo = collections.namedtuple(
768             'MethodInfo',
769             ['NEED_ARGUMENTS', 'NEED_REST'])
770         method_infos = []
771         for method_id in range(method_count):
772             param_count = u30()
773             u30()  # return type
774             for _ in range(param_count):
775                 u30()  # param type
776             u30()  # name index (always 0 for youtube)
777             flags = read_byte()
778             if flags & 0x08 != 0:
779                 # Options present
780                 option_count = u30()
781                 for c in range(option_count):
782                     u30()  # val
783                     read_bytes(1)  # kind
784             if flags & 0x80 != 0:
785                 # Param names present
786                 for _ in range(param_count):
787                     u30()  # param name
788             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
789             method_infos.append(mi)
790
791         # Metadata
792         metadata_count = u30()
793         for _c in range(metadata_count):
794             u30()  # name
795             item_count = u30()
796             for _c2 in range(item_count):
797                 u30()  # key
798                 u30()  # value
799
800         def parse_traits_info():
801             trait_name_idx = u30()
802             kind_full = read_byte()
803             kind = kind_full & 0x0f
804             attrs = kind_full >> 4
805             methods = {}
806             if kind in [0x00, 0x06]:  # Slot or Const
807                 u30()  # Slot id
808                 u30()  # type_name_idx
809                 vindex = u30()
810                 if vindex != 0:
811                     read_byte()  # vkind
812             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
813                 u30()  # disp_id
814                 method_idx = u30()
815                 methods[multinames[trait_name_idx]] = method_idx
816             elif kind == 0x04:  # Class
817                 u30()  # slot_id
818                 u30()  # classi
819             elif kind == 0x05:  # Function
820                 u30()  # slot_id
821                 function_idx = u30()
822                 methods[function_idx] = multinames[trait_name_idx]
823             else:
824                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
825
826             if attrs & 0x4 != 0:  # Metadata present
827                 metadata_count = u30()
828                 for _c3 in range(metadata_count):
829                     u30()  # metadata index
830
831             return methods
832
833         # Classes
834         TARGET_CLASSNAME = u'SignatureDecipher'
835         searched_idx = multinames.index(TARGET_CLASSNAME)
836         searched_class_id = None
837         class_count = u30()
838         for class_id in range(class_count):
839             name_idx = u30()
840             if name_idx == searched_idx:
841                 # We found the class we're looking for!
842                 searched_class_id = class_id
843             u30()  # super_name idx
844             flags = read_byte()
845             if flags & 0x08 != 0:  # Protected namespace is present
846                 u30()  # protected_ns_idx
847             intrf_count = u30()
848             for _c2 in range(intrf_count):
849                 u30()
850             u30()  # iinit
851             trait_count = u30()
852             for _c2 in range(trait_count):
853                 parse_traits_info()
854
855         if searched_class_id is None:
856             raise ExtractorError(u'Target class %r not found' %
857                                  TARGET_CLASSNAME)
858
859         method_names = {}
860         method_idxs = {}
861         for class_id in range(class_count):
862             u30()  # cinit
863             trait_count = u30()
864             for _c2 in range(trait_count):
865                 trait_methods = parse_traits_info()
866                 if class_id == searched_class_id:
867                     method_names.update(trait_methods.items())
868                     method_idxs.update(dict(
869                         (idx, name)
870                         for name, idx in trait_methods.items()))
871
872         # Scripts
873         script_count = u30()
874         for _c in range(script_count):
875             u30()  # init
876             trait_count = u30()
877             for _c2 in range(trait_count):
878                 parse_traits_info()
879
880         # Method bodies
881         method_body_count = u30()
882         Method = collections.namedtuple('Method', ['code', 'local_count'])
883         methods = {}
884         for _c in range(method_body_count):
885             method_idx = u30()
886             u30()  # max_stack
887             local_count = u30()
888             u30()  # init_scope_depth
889             u30()  # max_scope_depth
890             code_length = u30()
891             code = read_bytes(code_length)
892             if method_idx in method_idxs:
893                 m = Method(code, local_count)
894                 methods[method_idxs[method_idx]] = m
895             exception_count = u30()
896             for _c2 in range(exception_count):
897                 u30()  # from
898                 u30()  # to
899                 u30()  # target
900                 u30()  # exc_type
901                 u30()  # var_name
902             trait_count = u30()
903             for _c2 in range(trait_count):
904                 parse_traits_info()
905
906         assert p + code_reader.tell() == len(code_tag)
907         assert len(methods) == len(method_idxs)
908
909         method_pyfunctions = {}
910
911         def extract_function(func_name):
912             if func_name in method_pyfunctions:
913                 return method_pyfunctions[func_name]
914             if func_name not in methods:
915                 raise ExtractorError(u'Cannot find function %r' % func_name)
916             m = methods[func_name]
917
918             def resfunc(args):
919                 registers = ['(this)'] + list(args) + [None] * m.local_count
920                 stack = []
921                 coder = io.BytesIO(m.code)
922                 while True:
923                     opcode = struct.unpack('!B', coder.read(1))[0]
924                     if opcode == 36:  # pushbyte
925                         v = struct.unpack('!B', coder.read(1))[0]
926                         stack.append(v)
927                     elif opcode == 44:  # pushstring
928                         idx = u30(coder)
929                         stack.append(constant_strings[idx])
930                     elif opcode == 48:  # pushscope
931                         # We don't implement the scope register, so we'll just
932                         # ignore the popped value
933                         stack.pop()
934                     elif opcode == 70:  # callproperty
935                         index = u30(coder)
936                         mname = multinames[index]
937                         arg_count = u30(coder)
938                         args = list(reversed(
939                             [stack.pop() for _ in range(arg_count)]))
940                         obj = stack.pop()
941                         if mname == u'split':
942                             assert len(args) == 1
943                             assert isinstance(args[0], compat_str)
944                             assert isinstance(obj, compat_str)
945                             if args[0] == u'':
946                                 res = list(obj)
947                             else:
948                                 res = obj.split(args[0])
949                             stack.append(res)
950                         elif mname == u'slice':
951                             assert len(args) == 1
952                             assert isinstance(args[0], int)
953                             assert isinstance(obj, list)
954                             res = obj[args[0]:]
955                             stack.append(res)
956                         elif mname == u'join':
957                             assert len(args) == 1
958                             assert isinstance(args[0], compat_str)
959                             assert isinstance(obj, list)
960                             res = args[0].join(obj)
961                             stack.append(res)
962                         elif mname in method_pyfunctions:
963                             stack.append(method_pyfunctions[mname](args))
964                         else:
965                             raise NotImplementedError(
966                                 u'Unsupported property %r on %r'
967                                 % (mname, obj))
968                     elif opcode == 72:  # returnvalue
969                         res = stack.pop()
970                         return res
971                     elif opcode == 79:  # callpropvoid
972                         index = u30(coder)
973                         mname = multinames[index]
974                         arg_count = u30(coder)
975                         args = list(reversed(
976                             [stack.pop() for _ in range(arg_count)]))
977                         obj = stack.pop()
978                         if mname == u'reverse':
979                             assert isinstance(obj, list)
980                             obj.reverse()
981                         else:
982                             raise NotImplementedError(
983                                 u'Unsupported (void) property %r on %r'
984                                 % (mname, obj))
985                     elif opcode == 93:  # findpropstrict
986                         index = u30(coder)
987                         mname = multinames[index]
988                         res = extract_function(mname)
989                         stack.append(res)
990                     elif opcode == 97:  # setproperty
991                         index = u30(coder)
992                         value = stack.pop()
993                         idx = stack.pop()
994                         obj = stack.pop()
995                         assert isinstance(obj, list)
996                         assert isinstance(idx, int)
997                         obj[idx] = value
998                     elif opcode == 98:  # getlocal
999                         index = u30(coder)
1000                         stack.append(registers[index])
1001                     elif opcode == 99:  # setlocal
1002                         index = u30(coder)
1003                         value = stack.pop()
1004                         registers[index] = value
1005                     elif opcode == 102:  # getproperty
1006                         index = u30(coder)
1007                         pname = multinames[index]
1008                         if pname == u'length':
1009                             obj = stack.pop()
1010                             assert isinstance(obj, list)
1011                             stack.append(len(obj))
1012                         else:  # Assume attribute access
1013                             idx = stack.pop()
1014                             assert isinstance(idx, int)
1015                             obj = stack.pop()
1016                             assert isinstance(obj, list)
1017                             stack.append(obj[idx])
1018                     elif opcode == 128:  # coerce
1019                         u30(coder)
1020                     elif opcode == 133:  # coerce_s
1021                         assert isinstance(stack[-1], (type(None), compat_str))
1022                     elif opcode == 164:  # modulo
1023                         value2 = stack.pop()
1024                         value1 = stack.pop()
1025                         res = value1 % value2
1026                         stack.append(res)
1027                     elif opcode == 208:  # getlocal_0
1028                         stack.append(registers[0])
1029                     elif opcode == 209:  # getlocal_1
1030                         stack.append(registers[1])
1031                     elif opcode == 210:  # getlocal_2
1032                         stack.append(registers[2])
1033                     elif opcode == 211:  # getlocal_3
1034                         stack.append(registers[3])
1035                     elif opcode == 214:  # setlocal_2
1036                         registers[2] = stack.pop()
1037                     elif opcode == 215:  # setlocal_3
1038                         registers[3] = stack.pop()
1039                     else:
1040                         raise NotImplementedError(
1041                             u'Unsupported opcode %d' % opcode)
1042
1043             method_pyfunctions[func_name] = resfunc
1044             return resfunc
1045
1046         initial_function = extract_function(u'decipher')
1047         return lambda s: initial_function([s])
1048
1049     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1050         """Turn the encrypted s field into a working signature"""
1051
1052         if player_url is not None:
1053             try:
1054                 if player_url not in self._player_cache:
1055                     func = self._extract_signature_function(
1056                         video_id, player_url, len(s)
1057                     )
1058                     self._player_cache[player_url] = func
1059                 func = self._player_cache[player_url]
1060                 if self._downloader.params.get('youtube_print_sig_code'):
1061                     self._print_sig_code(func, len(s))
1062                 return func(s)
1063             except Exception:
1064                 tb = traceback.format_exc()
1065                 self._downloader.report_warning(
1066                     u'Automatic signature extraction failed: ' + tb)
1067
1068             self._downloader.report_warning(
1069                 u'Warning: Falling back to static signature algorithm')
1070         return self._static_decrypt_signature(
1071             s, video_id, player_url, age_gate)
1072
1073     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1074         if age_gate:
1075             # The videos with age protection use another player, so the
1076             # algorithms can be different.
1077             if len(s) == 86:
1078                 return s[2:63] + s[82] + s[64:82] + s[63]
1079
1080         if len(s) == 93:
1081             return s[86:29:-1] + s[88] + s[28:5:-1]
1082         elif len(s) == 92:
1083             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1084         elif len(s) == 91:
1085             return s[84:27:-1] + s[86] + s[26:5:-1]
1086         elif len(s) == 90:
1087             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1088         elif len(s) == 89:
1089             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1090         elif len(s) == 88:
1091             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1092         elif len(s) == 87:
1093             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1094         elif len(s) == 86:
1095             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1096         elif len(s) == 85:
1097             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1098         elif len(s) == 84:
1099             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1100         elif len(s) == 83:
1101             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1102         elif len(s) == 82:
1103             return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1104         elif len(s) == 81:
1105             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1106         elif len(s) == 80:
1107             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1108         elif len(s) == 79:
1109             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1110
1111         else:
1112             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1113
1114     def _get_available_subtitles(self, video_id):
1115         try:
1116             sub_list = self._download_webpage(
1117                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1118                 video_id, note=False)
1119         except ExtractorError as err:
1120             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1121             return {}
1122         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1123
1124         sub_lang_list = {}
1125         for l in lang_list:
1126             lang = l[1]
1127             params = compat_urllib_parse.urlencode({
1128                 'lang': lang,
1129                 'v': video_id,
1130                 'fmt': self._downloader.params.get('subtitlesformat'),
1131             })
1132             url = u'http://www.youtube.com/api/timedtext?' + params
1133             sub_lang_list[lang] = url
1134         if not sub_lang_list:
1135             self._downloader.report_warning(u'video doesn\'t have subtitles')
1136             return {}
1137         return sub_lang_list
1138
1139     def _get_available_automatic_caption(self, video_id, webpage):
1140         """We need the webpage for getting the captions url, pass it as an
1141            argument to speed up the process."""
1142         sub_format = self._downloader.params.get('subtitlesformat')
1143         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1144         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1145         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1146         if mobj is None:
1147             self._downloader.report_warning(err_msg)
1148             return {}
1149         player_config = json.loads(mobj.group(1))
1150         try:
1151             args = player_config[u'args']
1152             caption_url = args[u'ttsurl']
1153             timestamp = args[u'timestamp']
1154             # We get the available subtitles
1155             list_params = compat_urllib_parse.urlencode({
1156                 'type': 'list',
1157                 'tlangs': 1,
1158                 'asrs': 1,
1159             })
1160             list_url = caption_url + '&' + list_params
1161             list_page = self._download_webpage(list_url, video_id)
1162             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1163             original_lang_node = caption_list.find('track')
1164             if original_lang_node.attrib.get('kind') != 'asr' :
1165                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1166                 return {}
1167             original_lang = original_lang_node.attrib['lang_code']
1168
1169             sub_lang_list = {}
1170             for lang_node in caption_list.findall('target'):
1171                 sub_lang = lang_node.attrib['lang_code']
1172                 params = compat_urllib_parse.urlencode({
1173                     'lang': original_lang,
1174                     'tlang': sub_lang,
1175                     'fmt': sub_format,
1176                     'ts': timestamp,
1177                     'kind': 'asr',
1178                 })
1179                 sub_lang_list[sub_lang] = caption_url + '&' + params
1180             return sub_lang_list
1181         # An extractor error can be raise by the download process if there are
1182         # no automatic captions but there are subtitles
1183         except (KeyError, ExtractorError):
1184             self._downloader.report_warning(err_msg)
1185             return {}
1186
1187     def _print_formats(self, formats):
1188         print('Available formats:')
1189         for x in formats:
1190             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1191                                         self._video_dimensions.get(x, '???'),
1192                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1193
1194     def _extract_id(self, url):
1195         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1196         if mobj is None:
1197             raise ExtractorError(u'Invalid URL: %s' % url)
1198         video_id = mobj.group(2)
1199         return video_id
1200
1201     def _get_video_url_list(self, url_map):
1202         """
1203         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1204         with the requested formats.
1205         """
1206         req_format = self._downloader.params.get('format', None)
1207         format_limit = self._downloader.params.get('format_limit', None)
1208         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1209         if format_limit is not None and format_limit in available_formats:
1210             format_list = available_formats[available_formats.index(format_limit):]
1211         else:
1212             format_list = available_formats
1213         existing_formats = [x for x in format_list if x in url_map]
1214         if len(existing_formats) == 0:
1215             raise ExtractorError(u'no known formats available for video')
1216         if self._downloader.params.get('listformats', None):
1217             self._print_formats(existing_formats)
1218             return
1219         if req_format is None or req_format == 'best':
1220             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1221         elif req_format == 'worst':
1222             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1223         elif req_format in ('-1', 'all'):
1224             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1225         else:
1226             # Specific formats. We pick the first in a slash-delimeted sequence.
1227             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1228             # available in the specified format. For example,
1229             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1230             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1231             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1232             req_formats = req_format.split('/')
1233             video_url_list = None
1234             for rf in req_formats:
1235                 if rf in url_map:
1236                     video_url_list = [(rf, url_map[rf])]
1237                     break
1238                 if rf in self._video_formats_map:
1239                     for srf in self._video_formats_map[rf]:
1240                         if srf in url_map:
1241                             video_url_list = [(srf, url_map[srf])]
1242                             break
1243                     else:
1244                         continue
1245                     break
1246             if video_url_list is None:
1247                 raise ExtractorError(u'requested format not available')
1248         return video_url_list
1249
1250     def _extract_from_m3u8(self, manifest_url, video_id):
1251         url_map = {}
1252         def _get_urls(_manifest):
1253             lines = _manifest.split('\n')
1254             urls = filter(lambda l: l and not l.startswith('#'),
1255                             lines)
1256             return urls
1257         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1258         formats_urls = _get_urls(manifest)
1259         for format_url in formats_urls:
1260             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1261             url_map[itag] = format_url
1262         return url_map
1263
1264     def _real_extract(self, url):
1265         if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1266             self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
1267
1268         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1269         mobj = re.search(self._NEXT_URL_RE, url)
1270         if mobj:
1271             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1272         video_id = self._extract_id(url)
1273
1274         # Get video webpage
1275         self.report_video_webpage_download(video_id)
1276         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1277         request = compat_urllib_request.Request(url)
1278         try:
1279             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1280         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1281             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1282
1283         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1284
1285         # Attempt to extract SWF player URL
1286         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1287         if mobj is not None:
1288             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1289         else:
1290             player_url = None
1291
1292         # Get video info
1293         self.report_video_info_webpage_download(video_id)
1294         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1295             self.report_age_confirmation()
1296             age_gate = True
1297             # We simulate the access to the video from www.youtube.com/v/{video_id}
1298             # this can be viewed without login into Youtube
1299             data = compat_urllib_parse.urlencode({'video_id': video_id,
1300                                                   'el': 'embedded',
1301                                                   'gl': 'US',
1302                                                   'hl': 'en',
1303                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1304                                                   'asv': 3,
1305                                                   'sts':'1588',
1306                                                   })
1307             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1308             video_info_webpage = self._download_webpage(video_info_url, video_id,
1309                                     note=False,
1310                                     errnote='unable to download video info webpage')
1311             video_info = compat_parse_qs(video_info_webpage)
1312         else:
1313             age_gate = False
1314             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1315                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1316                         % (video_id, el_type))
1317                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1318                                         note=False,
1319                                         errnote='unable to download video info webpage')
1320                 video_info = compat_parse_qs(video_info_webpage)
1321                 if 'token' in video_info:
1322                     break
1323         if 'token' not in video_info:
1324             if 'reason' in video_info:
1325                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1326             else:
1327                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1328
1329         # Check for "rental" videos
1330         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1331             raise ExtractorError(u'"rental" videos not supported')
1332
1333         # Start extracting information
1334         self.report_information_extraction(video_id)
1335
1336         # uploader
1337         if 'author' not in video_info:
1338             raise ExtractorError(u'Unable to extract uploader name')
1339         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1340
1341         # uploader_id
1342         video_uploader_id = None
1343         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1344         if mobj is not None:
1345             video_uploader_id = mobj.group(1)
1346         else:
1347             self._downloader.report_warning(u'unable to extract uploader nickname')
1348
1349         # title
1350         if 'title' not in video_info:
1351             raise ExtractorError(u'Unable to extract video title')
1352         video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1353
1354         # thumbnail image
1355         # We try first to get a high quality image:
1356         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1357                             video_webpage, re.DOTALL)
1358         if m_thumb is not None:
1359             video_thumbnail = m_thumb.group(1)
1360         elif 'thumbnail_url' not in video_info:
1361             self._downloader.report_warning(u'unable to extract video thumbnail')
1362             video_thumbnail = ''
1363         else:   # don't panic if we can't find it
1364             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1365
1366         # upload date
1367         upload_date = None
1368         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1369         if mobj is not None:
1370             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1371             upload_date = unified_strdate(upload_date)
1372
1373         # description
1374         video_description = get_element_by_id("eow-description", video_webpage)
1375         if video_description:
1376             video_description = clean_html(video_description)
1377         else:
1378             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1379             if fd_mobj:
1380                 video_description = unescapeHTML(fd_mobj.group(1))
1381             else:
1382                 video_description = u''
1383
1384         # subtitles
1385         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1386
1387         if self._downloader.params.get('listsubtitles', False):
1388             self._list_available_subtitles(video_id, video_webpage)
1389             return
1390
1391         if 'length_seconds' not in video_info:
1392             self._downloader.report_warning(u'unable to extract video duration')
1393             video_duration = ''
1394         else:
1395             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1396
1397         # Decide which formats to download
1398
1399         try:
1400             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1401             if not mobj:
1402                 raise ValueError('Could not find vevo ID')
1403             info = json.loads(mobj.group(1))
1404             args = info['args']
1405             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1406             # this signatures are encrypted
1407             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1408             if m_s is not None:
1409                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1410                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1411             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1412             if m_s is not None:
1413                 if 'url_encoded_fmt_stream_map' in video_info:
1414                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1415                 else:
1416                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1417             elif 'adaptive_fmts' in video_info:
1418                 if 'url_encoded_fmt_stream_map' in video_info:
1419                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1420                 else:
1421                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1422         except ValueError:
1423             pass
1424
1425         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1426             self.report_rtmp_download()
1427             video_url_list = [(None, video_info['conn'][0])]
1428         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1429             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1430                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1431             url_map = {}
1432             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1433                 url_data = compat_parse_qs(url_data_str)
1434                 if 'itag' in url_data and 'url' in url_data:
1435                     url = url_data['url'][0]
1436                     if 'sig' in url_data:
1437                         url += '&signature=' + url_data['sig'][0]
1438                     elif 's' in url_data:
1439                         encrypted_sig = url_data['s'][0]
1440                         if self._downloader.params.get('verbose'):
1441                             if age_gate:
1442                                 if player_url is None:
1443                                     player_version = 'unknown'
1444                                 else:
1445                                     player_version = self._search_regex(
1446                                         r'-(.+)\.swf$', player_url,
1447                                         u'flash player', fatal=False)
1448                                 player_desc = 'flash player %s' % player_version
1449                             else:
1450                                 player_version = self._search_regex(
1451                                     r'html5player-(.+?)\.js', video_webpage,
1452                                     'html5 player', fatal=False)
1453                                 player_desc = u'html5 player %s' % player_version
1454
1455                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1456                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1457                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1458
1459                         if not age_gate:
1460                             jsplayer_url_json = self._search_regex(
1461                                 r'"assets":.+?"js":\s*("[^"]+")',
1462                                 video_webpage, u'JS player URL')
1463                             player_url = json.loads(jsplayer_url_json)
1464
1465                         signature = self._decrypt_signature(
1466                             encrypted_sig, video_id, player_url, age_gate)
1467                         url += '&signature=' + signature
1468                     if 'ratebypass' not in url:
1469                         url += '&ratebypass=yes'
1470                     url_map[url_data['itag'][0]] = url
1471             video_url_list = self._get_video_url_list(url_map)
1472             if not video_url_list:
1473                 return
1474         elif video_info.get('hlsvp'):
1475             manifest_url = video_info['hlsvp'][0]
1476             url_map = self._extract_from_m3u8(manifest_url, video_id)
1477             video_url_list = self._get_video_url_list(url_map)
1478             if not video_url_list:
1479                 return
1480
1481         else:
1482             raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1483
1484         results = []
1485         for format_param, video_real_url in video_url_list:
1486             # Extension
1487             video_extension = self._video_extensions.get(format_param, 'flv')
1488
1489             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1490                                               self._video_dimensions.get(format_param, '???'),
1491                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1492
1493             results.append({
1494                 'id':       video_id,
1495                 'url':      video_real_url,
1496                 'uploader': video_uploader,
1497                 'uploader_id': video_uploader_id,
1498                 'upload_date':  upload_date,
1499                 'title':    video_title,
1500                 'ext':      video_extension,
1501                 'format':   video_format,
1502                 'thumbnail':    video_thumbnail,
1503                 'description':  video_description,
1504                 'player_url':   player_url,
1505                 'subtitles':    video_subtitles,
1506                 'duration':     video_duration
1507             })
1508         return results
1509
1510 class YoutubePlaylistIE(InfoExtractor):
1511     IE_DESC = u'YouTube.com playlists'
1512     _VALID_URL = r"""(?:
1513                         (?:https?://)?
1514                         (?:\w+\.)?
1515                         youtube\.com/
1516                         (?:
1517                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1518                            \? (?:.*?&)*? (?:p|a|list)=
1519                         |  p/
1520                         )
1521                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1522                         .*
1523                      |
1524                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1525                      )"""
1526     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1527     _MAX_RESULTS = 50
1528     IE_NAME = u'youtube:playlist'
1529
1530     @classmethod
1531     def suitable(cls, url):
1532         """Receives a URL and returns True if suitable for this IE."""
1533         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1534
1535     def _real_extract(self, url):
1536         # Extract playlist id
1537         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1538         if mobj is None:
1539             raise ExtractorError(u'Invalid URL: %s' % url)
1540
1541         # Download playlist videos from API
1542         playlist_id = mobj.group(1) or mobj.group(2)
1543         videos = []
1544
1545         for page_num in itertools.count(1):
1546             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1547             if start_index >= 1000:
1548                 self._downloader.report_warning(u'Max number of results reached')
1549                 break
1550             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1551             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1552
1553             try:
1554                 response = json.loads(page)
1555             except ValueError as err:
1556                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1557
1558             if 'feed' not in response:
1559                 raise ExtractorError(u'Got a malformed response from YouTube API')
1560             playlist_title = response['feed']['title']['$t']
1561             if 'entry' not in response['feed']:
1562                 # Number of videos is a multiple of self._MAX_RESULTS
1563                 break
1564
1565             for entry in response['feed']['entry']:
1566                 index = entry['yt$position']['$t']
1567                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1568                     videos.append((
1569                         index,
1570                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1571                     ))
1572
1573         videos = [v[1] for v in sorted(videos)]
1574
1575         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1576         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1577
1578
1579 class YoutubeChannelIE(InfoExtractor):
1580     IE_DESC = u'YouTube.com channels'
1581     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1582     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1583     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1584     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1585     IE_NAME = u'youtube:channel'
1586
1587     def extract_videos_from_page(self, page):
1588         ids_in_page = []
1589         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1590             if mobj.group(1) not in ids_in_page:
1591                 ids_in_page.append(mobj.group(1))
1592         return ids_in_page
1593
1594     def _real_extract(self, url):
1595         # Extract channel id
1596         mobj = re.match(self._VALID_URL, url)
1597         if mobj is None:
1598             raise ExtractorError(u'Invalid URL: %s' % url)
1599
1600         # Download channel page
1601         channel_id = mobj.group(1)
1602         video_ids = []
1603         pagenum = 1
1604
1605         url = self._TEMPLATE_URL % (channel_id, pagenum)
1606         page = self._download_webpage(url, channel_id,
1607                                       u'Downloading page #%s' % pagenum)
1608
1609         # Extract video identifiers
1610         ids_in_page = self.extract_videos_from_page(page)
1611         video_ids.extend(ids_in_page)
1612
1613         # Download any subsequent channel pages using the json-based channel_ajax query
1614         if self._MORE_PAGES_INDICATOR in page:
1615             for pagenum in itertools.count(1):
1616                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1617                 page = self._download_webpage(url, channel_id,
1618                                               u'Downloading page #%s' % pagenum)
1619
1620                 page = json.loads(page)
1621
1622                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1623                 video_ids.extend(ids_in_page)
1624
1625                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1626                     break
1627
1628         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1629
1630         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1631         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1632         return [self.playlist_result(url_entries, channel_id)]
1633
1634
1635 class YoutubeUserIE(InfoExtractor):
1636     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1637     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1638     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1639     _GDATA_PAGE_SIZE = 50
1640     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1641     IE_NAME = u'youtube:user'
1642
1643     @classmethod
1644     def suitable(cls, url):
1645         # Don't return True if the url can be extracted with other youtube
1646         # extractor, the regex would is too permissive and it would match.
1647         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1648         if any(ie.suitable(url) for ie in other_ies): return False
1649         else: return super(YoutubeUserIE, cls).suitable(url)
1650
1651     def _real_extract(self, url):
1652         # Extract username
1653         mobj = re.match(self._VALID_URL, url)
1654         if mobj is None:
1655             raise ExtractorError(u'Invalid URL: %s' % url)
1656
1657         username = mobj.group(1)
1658
1659         # Download video ids using YouTube Data API. Result size per
1660         # query is limited (currently to 50 videos) so we need to query
1661         # page by page until there are no video ids - it means we got
1662         # all of them.
1663
1664         video_ids = []
1665
1666         for pagenum in itertools.count(0):
1667             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1668
1669             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1670             page = self._download_webpage(gdata_url, username,
1671                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1672
1673             try:
1674                 response = json.loads(page)
1675             except ValueError as err:
1676                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1677             if 'entry' not in response['feed']:
1678                 # Number of videos is a multiple of self._MAX_RESULTS
1679                 break
1680
1681             # Extract video identifiers
1682             ids_in_page = []
1683             for entry in response['feed']['entry']:
1684                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1685             video_ids.extend(ids_in_page)
1686
1687             # A little optimization - if current page is not
1688             # "full", ie. does not contain PAGE_SIZE video ids then
1689             # we can assume that this page is the last one - there
1690             # are no more ids on further pages - no need to query
1691             # again.
1692
1693             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1694                 break
1695
1696         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1697         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1698         return [self.playlist_result(url_results, playlist_title = username)]
1699
1700 class YoutubeSearchIE(SearchInfoExtractor):
1701     IE_DESC = u'YouTube.com searches'
1702     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1703     _MAX_RESULTS = 1000
1704     IE_NAME = u'youtube:search'
1705     _SEARCH_KEY = 'ytsearch'
1706
1707     def report_download_page(self, query, pagenum):
1708         """Report attempt to download search page with given number."""
1709         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1710
1711     def _get_n_results(self, query, n):
1712         """Get a specified number of results for a query"""
1713
1714         video_ids = []
1715         pagenum = 0
1716         limit = n
1717
1718         while (50 * pagenum) < limit:
1719             self.report_download_page(query, pagenum+1)
1720             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1721             request = compat_urllib_request.Request(result_url)
1722             try:
1723                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1724             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1725                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1726             api_response = json.loads(data)['data']
1727
1728             if not 'items' in api_response:
1729                 raise ExtractorError(u'[youtube] No video results')
1730
1731             new_ids = list(video['id'] for video in api_response['items'])
1732             video_ids += new_ids
1733
1734             limit = min(n, api_response['totalItems'])
1735             pagenum += 1
1736
1737         if len(video_ids) > n:
1738             video_ids = video_ids[:n]
1739         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1740         return self.playlist_result(videos, query)
1741
1742
1743 class YoutubeShowIE(InfoExtractor):
1744     IE_DESC = u'YouTube.com (multi-season) shows'
1745     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1746     IE_NAME = u'youtube:show'
1747
1748     def _real_extract(self, url):
1749         mobj = re.match(self._VALID_URL, url)
1750         show_name = mobj.group(1)
1751         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1752         # There's one playlist for each season of the show
1753         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1754         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1755         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1756
1757
1758 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1759     """
1760     Base class for extractors that fetch info from
1761     http://www.youtube.com/feed_ajax
1762     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1763     """
1764     _LOGIN_REQUIRED = True
1765     _PAGING_STEP = 30
1766     # use action_load_personal_feed instead of action_load_system_feed
1767     _PERSONAL_FEED = False
1768
1769     @property
1770     def _FEED_TEMPLATE(self):
1771         action = 'action_load_system_feed'
1772         if self._PERSONAL_FEED:
1773             action = 'action_load_personal_feed'
1774         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1775
1776     @property
1777     def IE_NAME(self):
1778         return u'youtube:%s' % self._FEED_NAME
1779
1780     def _real_initialize(self):
1781         self._login()
1782
1783     def _real_extract(self, url):
1784         feed_entries = []
1785         # The step argument is available only in 2.7 or higher
1786         for i in itertools.count(0):
1787             paging = i*self._PAGING_STEP
1788             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1789                                           u'%s feed' % self._FEED_NAME,
1790                                           u'Downloading page %s' % i)
1791             info = json.loads(info)
1792             feed_html = info['feed_html']
1793             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1794             ids = orderedSet(m.group(1) for m in m_ids)
1795             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1796             if info['paging'] is None:
1797                 break
1798         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1799
1800 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1801     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1802     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1803     _FEED_NAME = 'subscriptions'
1804     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1805
1806 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1807     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1808     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1809     _FEED_NAME = 'recommended'
1810     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1811
1812 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1813     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1814     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1815     _FEED_NAME = 'watch_later'
1816     _PLAYLIST_TITLE = u'Youtube Watch Later'
1817     _PAGING_STEP = 100
1818     _PERSONAL_FEED = True
1819
1820 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1821     IE_NAME = u'youtube:favorites'
1822     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1823     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1824     _LOGIN_REQUIRED = True
1825
1826     def _real_extract(self, url):
1827         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1828         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1829         return self.url_result(playlist_id, 'YoutubePlaylist')