Merge remote-tracking branch 'rzhxeo/youtube'
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import string
11 import struct
12 import traceback
13 import zlib
14
15 from .common import InfoExtractor, SearchInfoExtractor
16 from .subtitles import SubtitlesInfoExtractor
17 from ..utils import (
18     compat_chr,
19     compat_parse_qs,
20     compat_urllib_parse,
21     compat_urllib_request,
22     compat_urlparse,
23     compat_str,
24
25     clean_html,
26     get_cachedir,
27     get_element_by_id,
28     get_element_by_attribute,
29     ExtractorError,
30     unescapeHTML,
31     unified_strdate,
32     orderedSet,
33     write_json_file,
34 )
35
36 class YoutubeBaseInfoExtractor(InfoExtractor):
37     """Provide base functions for Youtube extractors"""
38     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
39     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
40     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
41     _NETRC_MACHINE = 'youtube'
42     # If True it will raise an error if no login info is provided
43     _LOGIN_REQUIRED = False
44
45     def _set_language(self):
46         return bool(self._download_webpage(
47             self._LANG_URL, None,
48             note=u'Setting language', errnote='unable to set language',
49             fatal=False))
50
51     def _login(self):
52         (username, password) = self._get_login_info()
53         # No authentication to be performed
54         if username is None:
55             if self._LOGIN_REQUIRED:
56                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57             return False
58
59         login_page = self._download_webpage(
60             self._LOGIN_URL, None,
61             note=u'Downloading login page',
62             errnote=u'unable to fetch login page', fatal=False)
63         if login_page is False:
64             return
65
66         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
67                                   login_page, u'Login GALX parameter')
68
69         # Log in
70         login_form_strs = {
71                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
72                 u'Email': username,
73                 u'GALX': galx,
74                 u'Passwd': password,
75                 u'PersistentCookie': u'yes',
76                 u'_utf8': u'霱',
77                 u'bgresponse': u'js_disabled',
78                 u'checkConnection': u'',
79                 u'checkedDomains': u'youtube',
80                 u'dnConn': u'',
81                 u'pstMsg': u'0',
82                 u'rmShown': u'1',
83                 u'secTok': u'',
84                 u'signIn': u'Sign in',
85                 u'timeStmp': u'',
86                 u'service': u'youtube',
87                 u'uilel': u'3',
88                 u'hl': u'en_US',
89         }
90         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
91         # chokes on unicode
92         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
93         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
94
95         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
96         login_results = self._download_webpage(
97             req, None,
98             note=u'Logging in', errnote=u'unable to log in', fatal=False)
99         if login_results is False:
100             return False
101         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
102             self._downloader.report_warning(u'unable to log in: bad username or password')
103             return False
104         return True
105
106     def _confirm_age(self):
107         age_form = {
108             'next_url': '/',
109             'action_confirm': 'Confirm',
110         }
111         req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
112
113         self._download_webpage(
114             req, None,
115             note=u'Confirming age', errnote=u'Unable to confirm age')
116         return True
117
118     def _real_initialize(self):
119         if self._downloader is None:
120             return
121         if not self._set_language():
122             return
123         if not self._login():
124             return
125         self._confirm_age()
126
127
128 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
129     IE_DESC = u'YouTube.com'
130     _VALID_URL = r"""(?x)^
131                      (
132                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
133                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
134                             tube\.majestyc\.net/|
135                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
136                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
137                          (?:                                                  # the various things that can precede the ID:
138                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
139                              |(?:                                             # or the v= param in all its forms
140                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
141                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
142                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
143                                  v=
144                              )
145                          ))
146                          |youtu\.be/                                          # just youtu.be/xxxx
147                          )
148                      )?                                                       # all until now is optional -> you can pass the naked ID
149                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
150                      (?(1).+)?                                                # if we found the ID, everything can follow
151                      $"""
152     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
153     # Listed in order of quality
154     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
155                           # Apple HTTP Live Streaming
156                           '96', '95', '94', '93', '92', '132', '151',
157                           # 3D
158                           '85', '84', '102', '83', '101', '82', '100',
159                           # Dash video
160                           '138', '137', '248', '136', '247', '135', '246',
161                           '245', '244', '134', '243', '133', '242', '160',
162                           # Dash audio
163                           '141', '172', '140', '171', '139',
164                           ]
165     _video_extensions = {
166         '13': '3gp',
167         '17': '3gp',
168         '18': 'mp4',
169         '22': 'mp4',
170         '36': '3gp',
171         '37': 'mp4',
172         '38': 'mp4',
173         '43': 'webm',
174         '44': 'webm',
175         '45': 'webm',
176         '46': 'webm',
177
178         # 3d videos
179         '82': 'mp4',
180         '83': 'mp4',
181         '84': 'mp4',
182         '85': 'mp4',
183         '100': 'webm',
184         '101': 'webm',
185         '102': 'webm',
186
187         # Apple HTTP Live Streaming
188         '92': 'mp4',
189         '93': 'mp4',
190         '94': 'mp4',
191         '95': 'mp4',
192         '96': 'mp4',
193         '132': 'mp4',
194         '151': 'mp4',
195
196         # Dash mp4
197         '133': 'mp4',
198         '134': 'mp4',
199         '135': 'mp4',
200         '136': 'mp4',
201         '137': 'mp4',
202         '138': 'mp4',
203         '160': 'mp4',
204
205         # Dash mp4 audio
206         '139': 'm4a',
207         '140': 'm4a',
208         '141': 'm4a',
209
210         # Dash webm
211         '171': 'webm',
212         '172': 'webm',
213         '242': 'webm',
214         '243': 'webm',
215         '244': 'webm',
216         '245': 'webm',
217         '246': 'webm',
218         '247': 'webm',
219         '248': 'webm',
220     }
221     _video_dimensions = {
222         '5': {'width': 400, 'height': 240},
223         '6': {},
224         '13': {},
225         '17': {'width': 176, 'height': 144},
226         '18': {'width': 640, 'height': 360},
227         '22': {'width': 1280, 'height': 720},
228         '34': {'width': 640, 'height': 360},
229         '35': {'width': 854, 'height': 480},
230         '36': {'width': 320, 'height': 240},
231         '37': {'width': 1920, 'height': 1080},
232         '38': {'width': 4096, 'height': 3072},
233         '43': {'width': 640, 'height': 360},
234         '44': {'width': 854, 'height': 480},
235         '45': {'width': 1280, 'height': 720},
236         '46': {'width': 1920, 'height': 1080},
237         '82': {'height': 360, 'display': '360p'},
238         '83': {'height': 480, 'display': '480p'},
239         '84': {'height': 720, 'display': '720p'},
240         '85': {'height': 1080, 'display': '1080p'},
241         '92': {'height': 240, 'display': '240p'},
242         '93': {'height': 360, 'display': '360p'},
243         '94': {'height': 480, 'display': '480p'},
244         '95': {'height': 720, 'display': '720p'},
245         '96': {'height': 1080, 'display': '1080p'},
246         '100': {'height': 360, 'display': '360p'},
247         '101': {'height': 480, 'display': '480p'},
248         '102': {'height': 720, 'display': '720p'},
249         '132': {'height': 240, 'display': '240p'},
250         '151': {'height': 72, 'display': '72p'},
251         '133': {'height': 240, 'display': '240p'},
252         '134': {'height': 360, 'display': '360p'},
253         '135': {'height': 480, 'display': '480p'},
254         '136': {'height': 720, 'display': '720p'},
255         '137': {'height': 1080, 'display': '1080p'},
256         '138': {'height': 1081, 'display': '>1080p'},
257         '139': {'display': '48k'},
258         '140': {'display': '128k'},
259         '141': {'display': '256k'},
260         '160': {'height': 192, 'display': '192p'},
261         '171': {'display': '128k'},
262         '172': {'display': '256k'},
263         '242': {'height': 240, 'display': '240p'},
264         '243': {'height': 360, 'display': '360p'},
265         '244': {'height': 480, 'display': '480p'},
266         '245': {'height': 480, 'display': '480p'},
267         '246': {'height': 480, 'display': '480p'},
268         '247': {'height': 720, 'display': '720p'},
269         '248': {'height': 1080, 'display': '1080p'},
270     }
271     _special_itags = {
272         '82': '3D',
273         '83': '3D',
274         '84': '3D',
275         '85': '3D',
276         '100': '3D',
277         '101': '3D',
278         '102': '3D',
279         '133': 'DASH Video',
280         '134': 'DASH Video',
281         '135': 'DASH Video',
282         '136': 'DASH Video',
283         '137': 'DASH Video',
284         '138': 'DASH Video',
285         '139': 'DASH Audio',
286         '140': 'DASH Audio',
287         '141': 'DASH Audio',
288         '160': 'DASH Video',
289         '171': 'DASH Audio',
290         '172': 'DASH Audio',
291         '242': 'DASH Video',
292         '243': 'DASH Video',
293         '244': 'DASH Video',
294         '245': 'DASH Video',
295         '246': 'DASH Video',
296         '247': 'DASH Video',
297         '248': 'DASH Video',
298     }
299
300     IE_NAME = u'youtube'
301     _TESTS = [
302         {
303             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
304             u"file":  u"BaW_jenozKc.mp4",
305             u"info_dict": {
306                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
307                 u"uploader": u"Philipp Hagemeister",
308                 u"uploader_id": u"phihag",
309                 u"upload_date": u"20121002",
310                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
311             }
312         },
313         {
314             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
315             u"file":  u"UxxajLWwzqY.mp4",
316             u"note": u"Test generic use_cipher_signature video (#897)",
317             u"info_dict": {
318                 u"upload_date": u"20120506",
319                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
320                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
321                 u"uploader": u"Icona Pop",
322                 u"uploader_id": u"IconaPop"
323             }
324         },
325         {
326             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
327             u"file":  u"07FYdnEawAQ.mp4",
328             u"note": u"Test VEVO video with age protection (#956)",
329             u"info_dict": {
330                 u"upload_date": u"20130703",
331                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
332                 u"description": u"md5:64249768eec3bc4276236606ea996373",
333                 u"uploader": u"justintimberlakeVEVO",
334                 u"uploader_id": u"justintimberlakeVEVO"
335             }
336         },
337         {
338             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
339             u"file":  u"yZIXLfi8CZQ.mp4",
340             u"note": u"Embed-only video (#1746)",
341             u"info_dict": {
342                 u"upload_date": u"20120608",
343                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
344                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
345                 u"uploader": u"SET India",
346                 u"uploader_id": u"setindia"
347             }
348         },
349     ]
350
351
352     @classmethod
353     def suitable(cls, url):
354         """Receives a URL and returns True if suitable for this IE."""
355         if YoutubePlaylistIE.suitable(url): return False
356         return re.match(cls._VALID_URL, url) is not None
357
358     def __init__(self, *args, **kwargs):
359         super(YoutubeIE, self).__init__(*args, **kwargs)
360         self._player_cache = {}
361
362     def report_video_info_webpage_download(self, video_id):
363         """Report attempt to download video info webpage."""
364         self.to_screen(u'%s: Downloading video info webpage' % video_id)
365
366     def report_information_extraction(self, video_id):
367         """Report attempt to extract video information."""
368         self.to_screen(u'%s: Extracting video information' % video_id)
369
370     def report_unavailable_format(self, video_id, format):
371         """Report extracted video URL."""
372         self.to_screen(u'%s: Format %s not available' % (video_id, format))
373
374     def report_rtmp_download(self):
375         """Indicate the download will use the RTMP protocol."""
376         self.to_screen(u'RTMP download detected')
377
378     def _extract_signature_function(self, video_id, player_url, slen):
379         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
380                         player_url)
381         player_type = id_m.group('ext')
382         player_id = id_m.group('id')
383
384         # Read from filesystem cache
385         func_id = '%s_%s_%d' % (player_type, player_id, slen)
386         assert os.path.basename(func_id) == func_id
387         cache_dir = get_cachedir(self._downloader.params)
388
389         cache_enabled = cache_dir is not None
390         if cache_enabled:
391             cache_fn = os.path.join(os.path.expanduser(cache_dir),
392                                     u'youtube-sigfuncs',
393                                     func_id + '.json')
394             try:
395                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
396                     cache_spec = json.load(cachef)
397                 return lambda s: u''.join(s[i] for i in cache_spec)
398             except IOError:
399                 pass  # No cache available
400
401         if player_type == 'js':
402             code = self._download_webpage(
403                 player_url, video_id,
404                 note=u'Downloading %s player %s' % (player_type, player_id),
405                 errnote=u'Download of %s failed' % player_url)
406             res = self._parse_sig_js(code)
407         elif player_type == 'swf':
408             urlh = self._request_webpage(
409                 player_url, video_id,
410                 note=u'Downloading %s player %s' % (player_type, player_id),
411                 errnote=u'Download of %s failed' % player_url)
412             code = urlh.read()
413             res = self._parse_sig_swf(code)
414         else:
415             assert False, 'Invalid player type %r' % player_type
416
417         if cache_enabled:
418             try:
419                 test_string = u''.join(map(compat_chr, range(slen)))
420                 cache_res = res(test_string)
421                 cache_spec = [ord(c) for c in cache_res]
422                 try:
423                     os.makedirs(os.path.dirname(cache_fn))
424                 except OSError as ose:
425                     if ose.errno != errno.EEXIST:
426                         raise
427                 write_json_file(cache_spec, cache_fn)
428             except Exception:
429                 tb = traceback.format_exc()
430                 self._downloader.report_warning(
431                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
432
433         return res
434
435     def _print_sig_code(self, func, slen):
436         def gen_sig_code(idxs):
437             def _genslice(start, end, step):
438                 starts = u'' if start == 0 else str(start)
439                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
440                 steps = u'' if step == 1 else (u':%d' % step)
441                 return u's[%s%s%s]' % (starts, ends, steps)
442
443             step = None
444             start = '(Never used)'  # Quelch pyflakes warnings - start will be
445                                     # set as soon as step is set
446             for i, prev in zip(idxs[1:], idxs[:-1]):
447                 if step is not None:
448                     if i - prev == step:
449                         continue
450                     yield _genslice(start, prev, step)
451                     step = None
452                     continue
453                 if i - prev in [-1, 1]:
454                     step = i - prev
455                     start = prev
456                     continue
457                 else:
458                     yield u's[%d]' % prev
459             if step is None:
460                 yield u's[%d]' % i
461             else:
462                 yield _genslice(start, i, step)
463
464         test_string = u''.join(map(compat_chr, range(slen)))
465         cache_res = func(test_string)
466         cache_spec = [ord(c) for c in cache_res]
467         expr_code = u' + '.join(gen_sig_code(cache_spec))
468         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
469         self.to_screen(u'Extracted signature function:\n' + code)
470
471     def _parse_sig_js(self, jscode):
472         funcname = self._search_regex(
473             r'signature=([a-zA-Z]+)', jscode,
474             u'Initial JS player signature function name')
475
476         functions = {}
477
478         def argidx(varname):
479             return string.lowercase.index(varname)
480
481         def interpret_statement(stmt, local_vars, allow_recursion=20):
482             if allow_recursion < 0:
483                 raise ExtractorError(u'Recursion limit reached')
484
485             if stmt.startswith(u'var '):
486                 stmt = stmt[len(u'var '):]
487             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
488                              r'=(?P<expr>.*)$', stmt)
489             if ass_m:
490                 if ass_m.groupdict().get('index'):
491                     def assign(val):
492                         lvar = local_vars[ass_m.group('out')]
493                         idx = interpret_expression(ass_m.group('index'),
494                                                    local_vars, allow_recursion)
495                         assert isinstance(idx, int)
496                         lvar[idx] = val
497                         return val
498                     expr = ass_m.group('expr')
499                 else:
500                     def assign(val):
501                         local_vars[ass_m.group('out')] = val
502                         return val
503                     expr = ass_m.group('expr')
504             elif stmt.startswith(u'return '):
505                 assign = lambda v: v
506                 expr = stmt[len(u'return '):]
507             else:
508                 raise ExtractorError(
509                     u'Cannot determine left side of statement in %r' % stmt)
510
511             v = interpret_expression(expr, local_vars, allow_recursion)
512             return assign(v)
513
514         def interpret_expression(expr, local_vars, allow_recursion):
515             if expr.isdigit():
516                 return int(expr)
517
518             if expr.isalpha():
519                 return local_vars[expr]
520
521             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
522             if m:
523                 member = m.group('member')
524                 val = local_vars[m.group('in')]
525                 if member == 'split("")':
526                     return list(val)
527                 if member == 'join("")':
528                     return u''.join(val)
529                 if member == 'length':
530                     return len(val)
531                 if member == 'reverse()':
532                     return val[::-1]
533                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
534                 if slice_m:
535                     idx = interpret_expression(
536                         slice_m.group('idx'), local_vars, allow_recursion-1)
537                     return val[idx:]
538
539             m = re.match(
540                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
541             if m:
542                 val = local_vars[m.group('in')]
543                 idx = interpret_expression(m.group('idx'), local_vars,
544                                            allow_recursion-1)
545                 return val[idx]
546
547             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
548             if m:
549                 a = interpret_expression(m.group('a'),
550                                          local_vars, allow_recursion)
551                 b = interpret_expression(m.group('b'),
552                                          local_vars, allow_recursion)
553                 return a % b
554
555             m = re.match(
556                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
557             if m:
558                 fname = m.group('func')
559                 if fname not in functions:
560                     functions[fname] = extract_function(fname)
561                 argvals = [int(v) if v.isdigit() else local_vars[v]
562                            for v in m.group('args').split(',')]
563                 return functions[fname](argvals)
564             raise ExtractorError(u'Unsupported JS expression %r' % expr)
565
566         def extract_function(funcname):
567             func_m = re.search(
568                 r'function ' + re.escape(funcname) +
569                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
570                 jscode)
571             argnames = func_m.group('args').split(',')
572
573             def resf(args):
574                 local_vars = dict(zip(argnames, args))
575                 for stmt in func_m.group('code').split(';'):
576                     res = interpret_statement(stmt, local_vars)
577                 return res
578             return resf
579
580         initial_function = extract_function(funcname)
581         return lambda s: initial_function([s])
582
583     def _parse_sig_swf(self, file_contents):
584         if file_contents[1:3] != b'WS':
585             raise ExtractorError(
586                 u'Not an SWF file; header is %r' % file_contents[:3])
587         if file_contents[:1] == b'C':
588             content = zlib.decompress(file_contents[8:])
589         else:
590             raise NotImplementedError(u'Unsupported compression format %r' %
591                                       file_contents[:1])
592
593         def extract_tags(content):
594             pos = 0
595             while pos < len(content):
596                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
597                 pos += 2
598                 tag_code = header16 >> 6
599                 tag_len = header16 & 0x3f
600                 if tag_len == 0x3f:
601                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
602                     pos += 4
603                 assert pos+tag_len <= len(content)
604                 yield (tag_code, content[pos:pos+tag_len])
605                 pos += tag_len
606
607         code_tag = next(tag
608                         for tag_code, tag in extract_tags(content)
609                         if tag_code == 82)
610         p = code_tag.index(b'\0', 4) + 1
611         code_reader = io.BytesIO(code_tag[p:])
612
613         # Parse ABC (AVM2 ByteCode)
614         def read_int(reader=None):
615             if reader is None:
616                 reader = code_reader
617             res = 0
618             shift = 0
619             for _ in range(5):
620                 buf = reader.read(1)
621                 assert len(buf) == 1
622                 b = struct.unpack('<B', buf)[0]
623                 res = res | ((b & 0x7f) << shift)
624                 if b & 0x80 == 0:
625                     break
626                 shift += 7
627             return res
628
629         def u30(reader=None):
630             res = read_int(reader)
631             assert res & 0xf0000000 == 0
632             return res
633         u32 = read_int
634
635         def s32(reader=None):
636             v = read_int(reader)
637             if v & 0x80000000 != 0:
638                 v = - ((v ^ 0xffffffff) + 1)
639             return v
640
641         def read_string(reader=None):
642             if reader is None:
643                 reader = code_reader
644             slen = u30(reader)
645             resb = reader.read(slen)
646             assert len(resb) == slen
647             return resb.decode('utf-8')
648
649         def read_bytes(count, reader=None):
650             if reader is None:
651                 reader = code_reader
652             resb = reader.read(count)
653             assert len(resb) == count
654             return resb
655
656         def read_byte(reader=None):
657             resb = read_bytes(1, reader=reader)
658             res = struct.unpack('<B', resb)[0]
659             return res
660
661         # minor_version + major_version
662         read_bytes(2 + 2)
663
664         # Constant pool
665         int_count = u30()
666         for _c in range(1, int_count):
667             s32()
668         uint_count = u30()
669         for _c in range(1, uint_count):
670             u32()
671         double_count = u30()
672         read_bytes((double_count-1) * 8)
673         string_count = u30()
674         constant_strings = [u'']
675         for _c in range(1, string_count):
676             s = read_string()
677             constant_strings.append(s)
678         namespace_count = u30()
679         for _c in range(1, namespace_count):
680             read_bytes(1)  # kind
681             u30()  # name
682         ns_set_count = u30()
683         for _c in range(1, ns_set_count):
684             count = u30()
685             for _c2 in range(count):
686                 u30()
687         multiname_count = u30()
688         MULTINAME_SIZES = {
689             0x07: 2,  # QName
690             0x0d: 2,  # QNameA
691             0x0f: 1,  # RTQName
692             0x10: 1,  # RTQNameA
693             0x11: 0,  # RTQNameL
694             0x12: 0,  # RTQNameLA
695             0x09: 2,  # Multiname
696             0x0e: 2,  # MultinameA
697             0x1b: 1,  # MultinameL
698             0x1c: 1,  # MultinameLA
699         }
700         multinames = [u'']
701         for _c in range(1, multiname_count):
702             kind = u30()
703             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
704             if kind == 0x07:
705                 u30()  # namespace_idx
706                 name_idx = u30()
707                 multinames.append(constant_strings[name_idx])
708             else:
709                 multinames.append('[MULTINAME kind: %d]' % kind)
710                 for _c2 in range(MULTINAME_SIZES[kind]):
711                     u30()
712
713         # Methods
714         method_count = u30()
715         MethodInfo = collections.namedtuple(
716             'MethodInfo',
717             ['NEED_ARGUMENTS', 'NEED_REST'])
718         method_infos = []
719         for method_id in range(method_count):
720             param_count = u30()
721             u30()  # return type
722             for _ in range(param_count):
723                 u30()  # param type
724             u30()  # name index (always 0 for youtube)
725             flags = read_byte()
726             if flags & 0x08 != 0:
727                 # Options present
728                 option_count = u30()
729                 for c in range(option_count):
730                     u30()  # val
731                     read_bytes(1)  # kind
732             if flags & 0x80 != 0:
733                 # Param names present
734                 for _ in range(param_count):
735                     u30()  # param name
736             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
737             method_infos.append(mi)
738
739         # Metadata
740         metadata_count = u30()
741         for _c in range(metadata_count):
742             u30()  # name
743             item_count = u30()
744             for _c2 in range(item_count):
745                 u30()  # key
746                 u30()  # value
747
748         def parse_traits_info():
749             trait_name_idx = u30()
750             kind_full = read_byte()
751             kind = kind_full & 0x0f
752             attrs = kind_full >> 4
753             methods = {}
754             if kind in [0x00, 0x06]:  # Slot or Const
755                 u30()  # Slot id
756                 u30()  # type_name_idx
757                 vindex = u30()
758                 if vindex != 0:
759                     read_byte()  # vkind
760             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
761                 u30()  # disp_id
762                 method_idx = u30()
763                 methods[multinames[trait_name_idx]] = method_idx
764             elif kind == 0x04:  # Class
765                 u30()  # slot_id
766                 u30()  # classi
767             elif kind == 0x05:  # Function
768                 u30()  # slot_id
769                 function_idx = u30()
770                 methods[function_idx] = multinames[trait_name_idx]
771             else:
772                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
773
774             if attrs & 0x4 != 0:  # Metadata present
775                 metadata_count = u30()
776                 for _c3 in range(metadata_count):
777                     u30()  # metadata index
778
779             return methods
780
781         # Classes
782         TARGET_CLASSNAME = u'SignatureDecipher'
783         searched_idx = multinames.index(TARGET_CLASSNAME)
784         searched_class_id = None
785         class_count = u30()
786         for class_id in range(class_count):
787             name_idx = u30()
788             if name_idx == searched_idx:
789                 # We found the class we're looking for!
790                 searched_class_id = class_id
791             u30()  # super_name idx
792             flags = read_byte()
793             if flags & 0x08 != 0:  # Protected namespace is present
794                 u30()  # protected_ns_idx
795             intrf_count = u30()
796             for _c2 in range(intrf_count):
797                 u30()
798             u30()  # iinit
799             trait_count = u30()
800             for _c2 in range(trait_count):
801                 parse_traits_info()
802
803         if searched_class_id is None:
804             raise ExtractorError(u'Target class %r not found' %
805                                  TARGET_CLASSNAME)
806
807         method_names = {}
808         method_idxs = {}
809         for class_id in range(class_count):
810             u30()  # cinit
811             trait_count = u30()
812             for _c2 in range(trait_count):
813                 trait_methods = parse_traits_info()
814                 if class_id == searched_class_id:
815                     method_names.update(trait_methods.items())
816                     method_idxs.update(dict(
817                         (idx, name)
818                         for name, idx in trait_methods.items()))
819
820         # Scripts
821         script_count = u30()
822         for _c in range(script_count):
823             u30()  # init
824             trait_count = u30()
825             for _c2 in range(trait_count):
826                 parse_traits_info()
827
828         # Method bodies
829         method_body_count = u30()
830         Method = collections.namedtuple('Method', ['code', 'local_count'])
831         methods = {}
832         for _c in range(method_body_count):
833             method_idx = u30()
834             u30()  # max_stack
835             local_count = u30()
836             u30()  # init_scope_depth
837             u30()  # max_scope_depth
838             code_length = u30()
839             code = read_bytes(code_length)
840             if method_idx in method_idxs:
841                 m = Method(code, local_count)
842                 methods[method_idxs[method_idx]] = m
843             exception_count = u30()
844             for _c2 in range(exception_count):
845                 u30()  # from
846                 u30()  # to
847                 u30()  # target
848                 u30()  # exc_type
849                 u30()  # var_name
850             trait_count = u30()
851             for _c2 in range(trait_count):
852                 parse_traits_info()
853
854         assert p + code_reader.tell() == len(code_tag)
855         assert len(methods) == len(method_idxs)
856
857         method_pyfunctions = {}
858
859         def extract_function(func_name):
860             if func_name in method_pyfunctions:
861                 return method_pyfunctions[func_name]
862             if func_name not in methods:
863                 raise ExtractorError(u'Cannot find function %r' % func_name)
864             m = methods[func_name]
865
866             def resfunc(args):
867                 registers = ['(this)'] + list(args) + [None] * m.local_count
868                 stack = []
869                 coder = io.BytesIO(m.code)
870                 while True:
871                     opcode = struct.unpack('!B', coder.read(1))[0]
872                     if opcode == 36:  # pushbyte
873                         v = struct.unpack('!B', coder.read(1))[0]
874                         stack.append(v)
875                     elif opcode == 44:  # pushstring
876                         idx = u30(coder)
877                         stack.append(constant_strings[idx])
878                     elif opcode == 48:  # pushscope
879                         # We don't implement the scope register, so we'll just
880                         # ignore the popped value
881                         stack.pop()
882                     elif opcode == 70:  # callproperty
883                         index = u30(coder)
884                         mname = multinames[index]
885                         arg_count = u30(coder)
886                         args = list(reversed(
887                             [stack.pop() for _ in range(arg_count)]))
888                         obj = stack.pop()
889                         if mname == u'split':
890                             assert len(args) == 1
891                             assert isinstance(args[0], compat_str)
892                             assert isinstance(obj, compat_str)
893                             if args[0] == u'':
894                                 res = list(obj)
895                             else:
896                                 res = obj.split(args[0])
897                             stack.append(res)
898                         elif mname == u'slice':
899                             assert len(args) == 1
900                             assert isinstance(args[0], int)
901                             assert isinstance(obj, list)
902                             res = obj[args[0]:]
903                             stack.append(res)
904                         elif mname == u'join':
905                             assert len(args) == 1
906                             assert isinstance(args[0], compat_str)
907                             assert isinstance(obj, list)
908                             res = args[0].join(obj)
909                             stack.append(res)
910                         elif mname in method_pyfunctions:
911                             stack.append(method_pyfunctions[mname](args))
912                         else:
913                             raise NotImplementedError(
914                                 u'Unsupported property %r on %r'
915                                 % (mname, obj))
916                     elif opcode == 72:  # returnvalue
917                         res = stack.pop()
918                         return res
919                     elif opcode == 79:  # callpropvoid
920                         index = u30(coder)
921                         mname = multinames[index]
922                         arg_count = u30(coder)
923                         args = list(reversed(
924                             [stack.pop() for _ in range(arg_count)]))
925                         obj = stack.pop()
926                         if mname == u'reverse':
927                             assert isinstance(obj, list)
928                             obj.reverse()
929                         else:
930                             raise NotImplementedError(
931                                 u'Unsupported (void) property %r on %r'
932                                 % (mname, obj))
933                     elif opcode == 93:  # findpropstrict
934                         index = u30(coder)
935                         mname = multinames[index]
936                         res = extract_function(mname)
937                         stack.append(res)
938                     elif opcode == 97:  # setproperty
939                         index = u30(coder)
940                         value = stack.pop()
941                         idx = stack.pop()
942                         obj = stack.pop()
943                         assert isinstance(obj, list)
944                         assert isinstance(idx, int)
945                         obj[idx] = value
946                     elif opcode == 98:  # getlocal
947                         index = u30(coder)
948                         stack.append(registers[index])
949                     elif opcode == 99:  # setlocal
950                         index = u30(coder)
951                         value = stack.pop()
952                         registers[index] = value
953                     elif opcode == 102:  # getproperty
954                         index = u30(coder)
955                         pname = multinames[index]
956                         if pname == u'length':
957                             obj = stack.pop()
958                             assert isinstance(obj, list)
959                             stack.append(len(obj))
960                         else:  # Assume attribute access
961                             idx = stack.pop()
962                             assert isinstance(idx, int)
963                             obj = stack.pop()
964                             assert isinstance(obj, list)
965                             stack.append(obj[idx])
966                     elif opcode == 128:  # coerce
967                         u30(coder)
968                     elif opcode == 133:  # coerce_s
969                         assert isinstance(stack[-1], (type(None), compat_str))
970                     elif opcode == 164:  # modulo
971                         value2 = stack.pop()
972                         value1 = stack.pop()
973                         res = value1 % value2
974                         stack.append(res)
975                     elif opcode == 208:  # getlocal_0
976                         stack.append(registers[0])
977                     elif opcode == 209:  # getlocal_1
978                         stack.append(registers[1])
979                     elif opcode == 210:  # getlocal_2
980                         stack.append(registers[2])
981                     elif opcode == 211:  # getlocal_3
982                         stack.append(registers[3])
983                     elif opcode == 214:  # setlocal_2
984                         registers[2] = stack.pop()
985                     elif opcode == 215:  # setlocal_3
986                         registers[3] = stack.pop()
987                     else:
988                         raise NotImplementedError(
989                             u'Unsupported opcode %d' % opcode)
990
991             method_pyfunctions[func_name] = resfunc
992             return resfunc
993
994         initial_function = extract_function(u'decipher')
995         return lambda s: initial_function([s])
996
997     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
998         """Turn the encrypted s field into a working signature"""
999
1000         if player_url is not None:
1001             if player_url.startswith(u'//'):
1002                 player_url = u'https:' + player_url
1003             try:
1004                 player_id = (player_url, len(s))
1005                 if player_id not in self._player_cache:
1006                     func = self._extract_signature_function(
1007                         video_id, player_url, len(s)
1008                     )
1009                     self._player_cache[player_id] = func
1010                 func = self._player_cache[player_id]
1011                 if self._downloader.params.get('youtube_print_sig_code'):
1012                     self._print_sig_code(func, len(s))
1013                 return func(s)
1014             except Exception:
1015                 tb = traceback.format_exc()
1016                 self._downloader.report_warning(
1017                     u'Automatic signature extraction failed: ' + tb)
1018
1019             self._downloader.report_warning(
1020                 u'Warning: Falling back to static signature algorithm')
1021
1022         return self._static_decrypt_signature(
1023             s, video_id, player_url, age_gate)
1024
1025     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1026         if age_gate:
1027             # The videos with age protection use another player, so the
1028             # algorithms can be different.
1029             if len(s) == 86:
1030                 return s[2:63] + s[82] + s[64:82] + s[63]
1031
1032         if len(s) == 93:
1033             return s[86:29:-1] + s[88] + s[28:5:-1]
1034         elif len(s) == 92:
1035             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1036         elif len(s) == 91:
1037             return s[84:27:-1] + s[86] + s[26:5:-1]
1038         elif len(s) == 90:
1039             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1040         elif len(s) == 89:
1041             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1042         elif len(s) == 88:
1043             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1044         elif len(s) == 87:
1045             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1046         elif len(s) == 86:
1047             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1048         elif len(s) == 85:
1049             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1050         elif len(s) == 84:
1051             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1052         elif len(s) == 83:
1053             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1054         elif len(s) == 82:
1055             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1056         elif len(s) == 81:
1057             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1058         elif len(s) == 80:
1059             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1060         elif len(s) == 79:
1061             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1062
1063         else:
1064             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1065
1066     def _get_available_subtitles(self, video_id, webpage):
1067         try:
1068             sub_list = self._download_webpage(
1069                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1070                 video_id, note=False)
1071         except ExtractorError as err:
1072             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1073             return {}
1074         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1075
1076         sub_lang_list = {}
1077         for l in lang_list:
1078             lang = l[1]
1079             params = compat_urllib_parse.urlencode({
1080                 'lang': lang,
1081                 'v': video_id,
1082                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
1083                 'name': l[0].encode('utf-8'),
1084             })
1085             url = u'http://www.youtube.com/api/timedtext?' + params
1086             sub_lang_list[lang] = url
1087         if not sub_lang_list:
1088             self._downloader.report_warning(u'video doesn\'t have subtitles')
1089             return {}
1090         return sub_lang_list
1091
1092     def _get_available_automatic_caption(self, video_id, webpage):
1093         """We need the webpage for getting the captions url, pass it as an
1094            argument to speed up the process."""
1095         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
1096         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1097         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1098         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1099         if mobj is None:
1100             self._downloader.report_warning(err_msg)
1101             return {}
1102         player_config = json.loads(mobj.group(1))
1103         try:
1104             args = player_config[u'args']
1105             caption_url = args[u'ttsurl']
1106             timestamp = args[u'timestamp']
1107             # We get the available subtitles
1108             list_params = compat_urllib_parse.urlencode({
1109                 'type': 'list',
1110                 'tlangs': 1,
1111                 'asrs': 1,
1112             })
1113             list_url = caption_url + '&' + list_params
1114             caption_list = self._download_xml(list_url, video_id)
1115             original_lang_node = caption_list.find('track')
1116             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1117                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1118                 return {}
1119             original_lang = original_lang_node.attrib['lang_code']
1120
1121             sub_lang_list = {}
1122             for lang_node in caption_list.findall('target'):
1123                 sub_lang = lang_node.attrib['lang_code']
1124                 params = compat_urllib_parse.urlencode({
1125                     'lang': original_lang,
1126                     'tlang': sub_lang,
1127                     'fmt': sub_format,
1128                     'ts': timestamp,
1129                     'kind': 'asr',
1130                 })
1131                 sub_lang_list[sub_lang] = caption_url + '&' + params
1132             return sub_lang_list
1133         # An extractor error can be raise by the download process if there are
1134         # no automatic captions but there are subtitles
1135         except (KeyError, ExtractorError):
1136             self._downloader.report_warning(err_msg)
1137             return {}
1138
1139     def _extract_id(self, url):
1140         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1141         if mobj is None:
1142             raise ExtractorError(u'Invalid URL: %s' % url)
1143         video_id = mobj.group(2)
1144         return video_id
1145
1146     def _get_video_url_list(self, url_map):
1147         """
1148         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1149         with the requested formats.
1150         """
1151         existing_formats = [x for x in self._available_formats if x in url_map]
1152         if len(existing_formats) == 0:
1153             raise ExtractorError(u'no known formats available for video')
1154         video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1155         video_url_list.reverse() # order worst to best
1156         return video_url_list
1157
1158     def _extract_from_m3u8(self, manifest_url, video_id):
1159         url_map = {}
1160         def _get_urls(_manifest):
1161             lines = _manifest.split('\n')
1162             urls = filter(lambda l: l and not l.startswith('#'),
1163                             lines)
1164             return urls
1165         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1166         formats_urls = _get_urls(manifest)
1167         for format_url in formats_urls:
1168             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1169             url_map[itag] = format_url
1170         return url_map
1171
1172     def _extract_annotations(self, video_id):
1173         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1174         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1175
1176     def _real_extract(self, url):
1177         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1178         mobj = re.search(self._NEXT_URL_RE, url)
1179         if mobj:
1180             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1181         video_id = self._extract_id(url)
1182
1183         # Get video webpage
1184         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1185         video_webpage = self._download_webpage(url, video_id)
1186
1187         # Attempt to extract SWF player URL
1188         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1189         if mobj is not None:
1190             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1191         else:
1192             player_url = None
1193
1194         # Get video info
1195         self.report_video_info_webpage_download(video_id)
1196         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1197             self.report_age_confirmation()
1198             age_gate = True
1199             # We simulate the access to the video from www.youtube.com/v/{video_id}
1200             # this can be viewed without login into Youtube
1201             data = compat_urllib_parse.urlencode({'video_id': video_id,
1202                                                   'el': 'player_embedded',
1203                                                   'gl': 'US',
1204                                                   'hl': 'en',
1205                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1206                                                   'asv': 3,
1207                                                   'sts':'1588',
1208                                                   })
1209             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1210             video_info_webpage = self._download_webpage(video_info_url, video_id,
1211                                     note=False,
1212                                     errnote='unable to download video info webpage')
1213             video_info = compat_parse_qs(video_info_webpage)
1214         else:
1215             age_gate = False
1216             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1217                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1218                         % (video_id, el_type))
1219                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1220                                         note=False,
1221                                         errnote='unable to download video info webpage')
1222                 video_info = compat_parse_qs(video_info_webpage)
1223                 if 'token' in video_info:
1224                     break
1225         if 'token' not in video_info:
1226             if 'reason' in video_info:
1227                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1228             else:
1229                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1230
1231         if 'view_count' in video_info:
1232             view_count = int(video_info['view_count'][0])
1233         else:
1234             view_count = None
1235
1236         # Check for "rental" videos
1237         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1238             raise ExtractorError(u'"rental" videos not supported')
1239
1240         # Start extracting information
1241         self.report_information_extraction(video_id)
1242
1243         # uploader
1244         if 'author' not in video_info:
1245             raise ExtractorError(u'Unable to extract uploader name')
1246         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1247
1248         # uploader_id
1249         video_uploader_id = None
1250         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1251         if mobj is not None:
1252             video_uploader_id = mobj.group(1)
1253         else:
1254             self._downloader.report_warning(u'unable to extract uploader nickname')
1255
1256         # title
1257         if 'title' in video_info:
1258             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1259         else:
1260             self._downloader.report_warning(u'Unable to extract video title')
1261             video_title = u'_'
1262
1263         # thumbnail image
1264         # We try first to get a high quality image:
1265         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1266                             video_webpage, re.DOTALL)
1267         if m_thumb is not None:
1268             video_thumbnail = m_thumb.group(1)
1269         elif 'thumbnail_url' not in video_info:
1270             self._downloader.report_warning(u'unable to extract video thumbnail')
1271             video_thumbnail = None
1272         else:   # don't panic if we can't find it
1273             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1274
1275         # upload date
1276         upload_date = None
1277         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1278         if mobj is not None:
1279             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1280             upload_date = unified_strdate(upload_date)
1281
1282         # description
1283         video_description = get_element_by_id("eow-description", video_webpage)
1284         if video_description:
1285             video_description = re.sub(r'''(?x)
1286                 <a\s+
1287                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1288                     title="([^"]+)"\s+
1289                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1290                     class="yt-uix-redirect-link"\s*>
1291                 [^<]+
1292                 </a>
1293             ''', r'\1', video_description)
1294             video_description = clean_html(video_description)
1295         else:
1296             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1297             if fd_mobj:
1298                 video_description = unescapeHTML(fd_mobj.group(1))
1299             else:
1300                 video_description = u''
1301
1302         def _extract_count(klass):
1303             count = self._search_regex(
1304                 r'class="%s">([\d,]+)</span>' % re.escape(klass),
1305                 video_webpage, klass, default=None)
1306             if count is not None:
1307                 return int(count.replace(',', ''))
1308             return None
1309         like_count = _extract_count(u'likes-count')
1310         dislike_count = _extract_count(u'dislikes-count')
1311
1312         # subtitles
1313         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1314
1315         if self._downloader.params.get('listsubtitles', False):
1316             self._list_available_subtitles(video_id, video_webpage)
1317             return
1318
1319         if 'length_seconds' not in video_info:
1320             self._downloader.report_warning(u'unable to extract video duration')
1321             video_duration = None
1322         else:
1323             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
1324
1325         # annotations
1326         video_annotations = None
1327         if self._downloader.params.get('writeannotations', False):
1328                 video_annotations = self._extract_annotations(video_id)
1329
1330         # Decide which formats to download
1331
1332         try:
1333             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1334             if not mobj:
1335                 raise ValueError('Could not find vevo ID')
1336             info = json.loads(mobj.group(1))
1337             args = info['args']
1338             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1339             # this signatures are encrypted
1340             if 'url_encoded_fmt_stream_map' not in args:
1341                 raise ValueError(u'No stream_map present')  # caught below
1342             re_signature = re.compile(r'[&,]s=')
1343             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1344             if m_s is not None:
1345                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1346                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1347             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1348             if m_s is not None:
1349                 if 'adaptive_fmts' in video_info:
1350                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1351                 else:
1352                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1353         except ValueError:
1354             pass
1355
1356         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1357             self.report_rtmp_download()
1358             video_url_list = [(None, video_info['conn'][0])]
1359         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1360             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1361             if 'rtmpe%3Dyes' in encoded_url_map:
1362                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1363             url_map = {}
1364             for url_data_str in encoded_url_map.split(','):
1365                 url_data = compat_parse_qs(url_data_str)
1366                 if 'itag' in url_data and 'url' in url_data:
1367                     url = url_data['url'][0]
1368                     if 'sig' in url_data:
1369                         url += '&signature=' + url_data['sig'][0]
1370                     elif 's' in url_data:
1371                         encrypted_sig = url_data['s'][0]
1372                         if self._downloader.params.get('verbose'):
1373                             if age_gate:
1374                                 if player_url is None:
1375                                     player_version = 'unknown'
1376                                 else:
1377                                     player_version = self._search_regex(
1378                                         r'-(.+)\.swf$', player_url,
1379                                         u'flash player', fatal=False)
1380                                 player_desc = 'flash player %s' % player_version
1381                             else:
1382                                 player_version = self._search_regex(
1383                                     r'html5player-(.+?)\.js', video_webpage,
1384                                     'html5 player', fatal=False)
1385                                 player_desc = u'html5 player %s' % player_version
1386
1387                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1388                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1389                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1390
1391                         if not age_gate:
1392                             jsplayer_url_json = self._search_regex(
1393                                 r'"assets":.+?"js":\s*("[^"]+")',
1394                                 video_webpage, u'JS player URL')
1395                             player_url = json.loads(jsplayer_url_json)
1396
1397                         signature = self._decrypt_signature(
1398                             encrypted_sig, video_id, player_url, age_gate)
1399                         url += '&signature=' + signature
1400                     if 'ratebypass' not in url:
1401                         url += '&ratebypass=yes'
1402                     url_map[url_data['itag'][0]] = url
1403             video_url_list = self._get_video_url_list(url_map)
1404         elif video_info.get('hlsvp'):
1405             manifest_url = video_info['hlsvp'][0]
1406             url_map = self._extract_from_m3u8(manifest_url, video_id)
1407             video_url_list = self._get_video_url_list(url_map)
1408         else:
1409             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1410
1411         formats = []
1412         for itag, video_real_url in video_url_list:
1413             # Extension
1414             video_extension = self._video_extensions.get(itag, 'flv')
1415             resolution = self._video_dimensions.get(itag, {}).get('display')
1416             width = self._video_dimensions.get(itag, {}).get('width')
1417             height = self._video_dimensions.get(itag, {}).get('height')
1418             note = self._special_itags.get(itag)
1419
1420             video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
1421                                               '%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
1422                                               ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
1423
1424             formats.append({
1425                 'url':         video_real_url,
1426                 'ext':         video_extension,
1427                 'format':      video_format,
1428                 'format_id':   itag,
1429                 'player_url':  player_url,
1430                 '_resolution': resolution,
1431                 'width':       width,
1432                 'height':      height,
1433                 'format_note': note,
1434             })
1435         def _formats_key(f):
1436             return (f.get('height') if f.get('height') is not None else -1,
1437                     f.get('width') if f.get('width') is not None else -1)
1438         formats = sorted(formats, key=_formats_key)
1439
1440         return {
1441             'id':           video_id,
1442             'uploader':     video_uploader,
1443             'uploader_id':  video_uploader_id,
1444             'upload_date':  upload_date,
1445             'title':        video_title,
1446             'thumbnail':    video_thumbnail,
1447             'description':  video_description,
1448             'subtitles':    video_subtitles,
1449             'duration':     video_duration,
1450             'age_limit':    18 if age_gate else 0,
1451             'annotations':  video_annotations,
1452             'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
1453             'view_count':   view_count,
1454             'like_count': like_count,
1455             'dislike_count': dislike_count,
1456             'formats':      formats,
1457         }
1458
1459 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1460     IE_DESC = u'YouTube.com playlists'
1461     _VALID_URL = r"""(?:
1462                         (?:https?://)?
1463                         (?:\w+\.)?
1464                         youtube\.com/
1465                         (?:
1466                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1467                            \? (?:.*?&)*? (?:p|a|list)=
1468                         |  p/
1469                         )
1470                         ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
1471                         .*
1472                      |
1473                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
1474                      )"""
1475     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
1476     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1477     _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1478     IE_NAME = u'youtube:playlist'
1479
1480     @classmethod
1481     def suitable(cls, url):
1482         """Receives a URL and returns True if suitable for this IE."""
1483         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1484
1485     def _real_initialize(self):
1486         self._login()
1487
1488     def _ids_to_results(self, ids):
1489         return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1490                        for vid_id in ids]
1491
1492     def _extract_mix(self, playlist_id):
1493         # The mixes are generated from a a single video
1494         # the id of the playlist is just 'RD' + video_id
1495         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1496         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
1497         title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
1498             get_element_by_attribute('class', 'title ', webpage))
1499         title = clean_html(title_span)
1500         video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
1501         ids = orderedSet(re.findall(video_re, webpage))
1502         url_results = self._ids_to_results(ids)
1503
1504         return self.playlist_result(url_results, playlist_id, title)
1505
1506     def _real_extract(self, url):
1507         # Extract playlist id
1508         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1509         if mobj is None:
1510             raise ExtractorError(u'Invalid URL: %s' % url)
1511         playlist_id = mobj.group(1) or mobj.group(2)
1512
1513         # Check if it's a video-specific URL
1514         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1515         if 'v' in query_dict:
1516             video_id = query_dict['v'][0]
1517             if self._downloader.params.get('noplaylist'):
1518                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1519                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1520             else:
1521                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1522
1523         if playlist_id.startswith('RD'):
1524             # Mixes require a custom extraction process
1525             return self._extract_mix(playlist_id)
1526         if playlist_id.startswith('TL'):
1527             raise ExtractorError(u'For downloading YouTube.com top lists, use '
1528                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
1529
1530         # Extract the video ids from the playlist pages
1531         ids = []
1532
1533         for page_num in itertools.count(1):
1534             url = self._TEMPLATE_URL % (playlist_id, page_num)
1535             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1536             matches = re.finditer(self._VIDEO_RE, page)
1537             # We remove the duplicates and the link with index 0
1538             # (it's not the first video of the playlist)
1539             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1540             ids.extend(new_ids)
1541
1542             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1543                 break
1544
1545         playlist_title = self._og_search_title(page)
1546
1547         url_results = self._ids_to_results(ids)
1548         return self.playlist_result(url_results, playlist_id, playlist_title)
1549
1550
1551 class YoutubeTopListIE(YoutubePlaylistIE):
1552     IE_NAME = u'youtube:toplist'
1553     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1554         u' (Example: "yttoplist:music:Top Tracks")')
1555     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1556
1557     def _real_extract(self, url):
1558         mobj = re.match(self._VALID_URL, url)
1559         channel = mobj.group('chann')
1560         title = mobj.group('title')
1561         query = compat_urllib_parse.urlencode({'title': title})
1562         playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
1563         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1564         link = self._html_search_regex(playlist_re, channel_page, u'list')
1565         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1566         
1567         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1568         ids = []
1569         # sometimes the webpage doesn't contain the videos
1570         # retry until we get them
1571         for i in itertools.count(0):
1572             msg = u'Downloading Youtube mix'
1573             if i > 0:
1574                 msg += ', retry #%d' % i
1575             webpage = self._download_webpage(url, title, msg)
1576             ids = orderedSet(re.findall(video_re, webpage))
1577             if ids:
1578                 break
1579         url_results = self._ids_to_results(ids)
1580         return self.playlist_result(url_results, playlist_title=title)
1581
1582
1583 class YoutubeChannelIE(InfoExtractor):
1584     IE_DESC = u'YouTube.com channels'
1585     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1586     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1587     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1588     IE_NAME = u'youtube:channel'
1589
1590     def extract_videos_from_page(self, page):
1591         ids_in_page = []
1592         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1593             if mobj.group(1) not in ids_in_page:
1594                 ids_in_page.append(mobj.group(1))
1595         return ids_in_page
1596
1597     def _real_extract(self, url):
1598         # Extract channel id
1599         mobj = re.match(self._VALID_URL, url)
1600         if mobj is None:
1601             raise ExtractorError(u'Invalid URL: %s' % url)
1602
1603         # Download channel page
1604         channel_id = mobj.group(1)
1605         video_ids = []
1606         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1607         channel_page = self._download_webpage(url, channel_id)
1608         autogenerated = re.search(r'''(?x)
1609                 class="[^"]*?(?:
1610                     channel-header-autogenerated-label|
1611                     yt-channel-title-autogenerated
1612                 )[^"]*"''', channel_page) is not None
1613
1614         if autogenerated:
1615             # The videos are contained in a single page
1616             # the ajax pages can't be used, they are empty
1617             video_ids = self.extract_videos_from_page(channel_page)
1618         else:
1619             # Download all channel pages using the json-based channel_ajax query
1620             for pagenum in itertools.count(1):
1621                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1622                 page = self._download_webpage(url, channel_id,
1623                                               u'Downloading page #%s' % pagenum)
1624     
1625                 page = json.loads(page)
1626     
1627                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1628                 video_ids.extend(ids_in_page)
1629     
1630                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1631                     break
1632
1633         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1634
1635         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1636                        for video_id in video_ids]
1637         return self.playlist_result(url_entries, channel_id)
1638
1639
1640 class YoutubeUserIE(InfoExtractor):
1641     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1642     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1643     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1644     _GDATA_PAGE_SIZE = 50
1645     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1646     IE_NAME = u'youtube:user'
1647
1648     @classmethod
1649     def suitable(cls, url):
1650         # Don't return True if the url can be extracted with other youtube
1651         # extractor, the regex would is too permissive and it would match.
1652         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1653         if any(ie.suitable(url) for ie in other_ies): return False
1654         else: return super(YoutubeUserIE, cls).suitable(url)
1655
1656     def _real_extract(self, url):
1657         # Extract username
1658         mobj = re.match(self._VALID_URL, url)
1659         if mobj is None:
1660             raise ExtractorError(u'Invalid URL: %s' % url)
1661
1662         username = mobj.group(1)
1663
1664         # Download video ids using YouTube Data API. Result size per
1665         # query is limited (currently to 50 videos) so we need to query
1666         # page by page until there are no video ids - it means we got
1667         # all of them.
1668
1669         url_results = []
1670
1671         for pagenum in itertools.count(0):
1672             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1673
1674             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1675             page = self._download_webpage(gdata_url, username,
1676                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1677
1678             try:
1679                 response = json.loads(page)
1680             except ValueError as err:
1681                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1682             if 'entry' not in response['feed']:
1683                 # Number of videos is a multiple of self._MAX_RESULTS
1684                 break
1685
1686             # Extract video identifiers
1687             entries = response['feed']['entry']
1688             for entry in entries:
1689                 title = entry['title']['$t']
1690                 video_id = entry['id']['$t'].split('/')[-1]
1691                 url_results.append({
1692                     '_type': 'url',
1693                     'url': video_id,
1694                     'ie_key': 'Youtube',
1695                     'id': 'video_id',
1696                     'title': title,
1697                 })
1698
1699             # A little optimization - if current page is not
1700             # "full", ie. does not contain PAGE_SIZE video ids then
1701             # we can assume that this page is the last one - there
1702             # are no more ids on further pages - no need to query
1703             # again.
1704
1705             if len(entries) < self._GDATA_PAGE_SIZE:
1706                 break
1707
1708         return self.playlist_result(url_results, playlist_title=username)
1709
1710
1711 class YoutubeSearchIE(SearchInfoExtractor):
1712     IE_DESC = u'YouTube.com searches'
1713     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1714     _MAX_RESULTS = 1000
1715     IE_NAME = u'youtube:search'
1716     _SEARCH_KEY = 'ytsearch'
1717
1718     def _get_n_results(self, query, n):
1719         """Get a specified number of results for a query"""
1720
1721         video_ids = []
1722         pagenum = 0
1723         limit = n
1724
1725         while (50 * pagenum) < limit:
1726             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1727             data_json = self._download_webpage(
1728                 result_url, video_id=u'query "%s"' % query,
1729                 note=u'Downloading page %s' % (pagenum + 1),
1730                 errnote=u'Unable to download API page')
1731             data = json.loads(data_json)
1732             api_response = data['data']
1733
1734             if 'items' not in api_response:
1735                 raise ExtractorError(u'[youtube] No video results')
1736
1737             new_ids = list(video['id'] for video in api_response['items'])
1738             video_ids += new_ids
1739
1740             limit = min(n, api_response['totalItems'])
1741             pagenum += 1
1742
1743         if len(video_ids) > n:
1744             video_ids = video_ids[:n]
1745         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1746                   for video_id in video_ids]
1747         return self.playlist_result(videos, query)
1748
1749 class YoutubeSearchDateIE(YoutubeSearchIE):
1750     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1751     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1752     _SEARCH_KEY = 'ytsearchdate'
1753     IE_DESC = u'YouTube.com searches, newest videos first'
1754
1755 class YoutubeShowIE(InfoExtractor):
1756     IE_DESC = u'YouTube.com (multi-season) shows'
1757     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1758     IE_NAME = u'youtube:show'
1759
1760     def _real_extract(self, url):
1761         mobj = re.match(self._VALID_URL, url)
1762         show_name = mobj.group(1)
1763         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1764         # There's one playlist for each season of the show
1765         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1766         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1767         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1768
1769
1770 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1771     """
1772     Base class for extractors that fetch info from
1773     http://www.youtube.com/feed_ajax
1774     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1775     """
1776     _LOGIN_REQUIRED = True
1777     # use action_load_personal_feed instead of action_load_system_feed
1778     _PERSONAL_FEED = False
1779
1780     @property
1781     def _FEED_TEMPLATE(self):
1782         action = 'action_load_system_feed'
1783         if self._PERSONAL_FEED:
1784             action = 'action_load_personal_feed'
1785         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1786
1787     @property
1788     def IE_NAME(self):
1789         return u'youtube:%s' % self._FEED_NAME
1790
1791     def _real_initialize(self):
1792         self._login()
1793
1794     def _real_extract(self, url):
1795         feed_entries = []
1796         paging = 0
1797         for i in itertools.count(1):
1798             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1799                                           u'%s feed' % self._FEED_NAME,
1800                                           u'Downloading page %s' % i)
1801             info = json.loads(info)
1802             feed_html = info['feed_html']
1803             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1804             ids = orderedSet(m.group(1) for m in m_ids)
1805             feed_entries.extend(
1806                 self.url_result(video_id, 'Youtube', video_id=video_id)
1807                 for video_id in ids)
1808             if info['paging'] is None:
1809                 break
1810             paging = info['paging']
1811         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1812
1813 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1814     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1815     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1816     _FEED_NAME = 'subscriptions'
1817     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1818
1819 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1820     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1821     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1822     _FEED_NAME = 'recommended'
1823     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1824
1825 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1826     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1827     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1828     _FEED_NAME = 'watch_later'
1829     _PLAYLIST_TITLE = u'Youtube Watch Later'
1830     _PERSONAL_FEED = True
1831
1832 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1833     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1834     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1835     _FEED_NAME = 'history'
1836     _PERSONAL_FEED = True
1837     _PLAYLIST_TITLE = u'Youtube Watch History'
1838
1839 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1840     IE_NAME = u'youtube:favorites'
1841     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1842     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1843     _LOGIN_REQUIRED = True
1844
1845     def _real_extract(self, url):
1846         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1847         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1848         return self.url_result(playlist_id, 'YoutubePlaylist')
1849
1850
1851 class YoutubeTruncatedURLIE(InfoExtractor):
1852     IE_NAME = 'youtube:truncated_url'
1853     IE_DESC = False  # Do not list
1854     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1855
1856     def _real_extract(self, url):
1857         raise ExtractorError(
1858             u'Did you forget to quote the URL? Remember that & is a meta '
1859             u'character in most shells, so you want to put the URL in quotes, '
1860             u'like  youtube-dl '
1861             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1862             u' (or simply  youtube-dl BaW_jenozKc  ).',
1863             expected=True)