[youtube] Video categories added to metadata
[youtube-dl] / youtube_dl / extractor / youtube.py
1 # coding: utf-8
2
3 import collections
4 import errno
5 import io
6 import itertools
7 import json
8 import os.path
9 import re
10 import struct
11 import traceback
12 import zlib
13
14 from .common import InfoExtractor, SearchInfoExtractor
15 from .subtitles import SubtitlesInfoExtractor
16 from ..jsinterp import JSInterpreter
17 from ..utils import (
18     compat_chr,
19     compat_parse_qs,
20     compat_urllib_parse,
21     compat_urllib_request,
22     compat_urlparse,
23     compat_str,
24
25     clean_html,
26     get_cachedir,
27     get_element_by_id,
28     get_element_by_attribute,
29     ExtractorError,
30     int_or_none,
31     PagedList,
32     unescapeHTML,
33     unified_strdate,
34     orderedSet,
35     write_json_file,
36     uppercase_escape,
37 )
38
39 class YoutubeBaseInfoExtractor(InfoExtractor):
40     """Provide base functions for Youtube extractors"""
41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
43     _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
44     _NETRC_MACHINE = 'youtube'
45     # If True it will raise an error if no login info is provided
46     _LOGIN_REQUIRED = False
47
48     def _set_language(self):
49         return bool(self._download_webpage(
50             self._LANG_URL, None,
51             note=u'Setting language', errnote='unable to set language',
52             fatal=False))
53
54     def _login(self):
55         (username, password) = self._get_login_info()
56         # No authentication to be performed
57         if username is None:
58             if self._LOGIN_REQUIRED:
59                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
60             return False
61
62         login_page = self._download_webpage(
63             self._LOGIN_URL, None,
64             note=u'Downloading login page',
65             errnote=u'unable to fetch login page', fatal=False)
66         if login_page is False:
67             return
68
69         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
70                                   login_page, u'Login GALX parameter')
71
72         # Log in
73         login_form_strs = {
74                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
75                 u'Email': username,
76                 u'GALX': galx,
77                 u'Passwd': password,
78                 u'PersistentCookie': u'yes',
79                 u'_utf8': u'霱',
80                 u'bgresponse': u'js_disabled',
81                 u'checkConnection': u'',
82                 u'checkedDomains': u'youtube',
83                 u'dnConn': u'',
84                 u'pstMsg': u'0',
85                 u'rmShown': u'1',
86                 u'secTok': u'',
87                 u'signIn': u'Sign in',
88                 u'timeStmp': u'',
89                 u'service': u'youtube',
90                 u'uilel': u'3',
91                 u'hl': u'en_US',
92         }
93         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
94         # chokes on unicode
95         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
96         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
97
98         req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
99         login_results = self._download_webpage(
100             req, None,
101             note=u'Logging in', errnote=u'unable to log in', fatal=False)
102         if login_results is False:
103             return False
104         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
105             self._downloader.report_warning(u'unable to log in: bad username or password')
106             return False
107         return True
108
109     def _confirm_age(self):
110         age_form = {
111             'next_url': '/',
112             'action_confirm': 'Confirm',
113         }
114         req = compat_urllib_request.Request(self._AGE_URL,
115             compat_urllib_parse.urlencode(age_form).encode('ascii'))
116
117         self._download_webpage(
118             req, None,
119             note=u'Confirming age', errnote=u'Unable to confirm age')
120         return True
121
122     def _real_initialize(self):
123         if self._downloader is None:
124             return
125         if not self._set_language():
126             return
127         if not self._login():
128             return
129         self._confirm_age()
130
131
132 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
133     IE_DESC = u'YouTube.com'
134     _VALID_URL = r"""(?x)^
135                      (
136                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
137                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
138                             (?:www\.)?deturl\.com/www\.youtube\.com/|
139                             (?:www\.)?pwnyoutube\.com/|
140                             (?:www\.)?yourepeat\.com/|
141                             tube\.majestyc\.net/|
142                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
143                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
144                          (?:                                                  # the various things that can precede the ID:
145                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
146                              |(?:                                             # or the v= param in all its forms
147                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
148                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
149                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
150                                  v=
151                              )
152                          ))
153                          |youtu\.be/                                          # just youtu.be/xxxx
154                          |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
155                          )
156                      )?                                                       # all until now is optional -> you can pass the naked ID
157                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
158                      (?(1).+)?                                                # if we found the ID, everything can follow
159                      $"""
160     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
161     _formats = {
162         '5': {'ext': 'flv', 'width': 400, 'height': 240},
163         '6': {'ext': 'flv', 'width': 450, 'height': 270},
164         '13': {'ext': '3gp'},
165         '17': {'ext': '3gp', 'width': 176, 'height': 144},
166         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
167         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
168         '34': {'ext': 'flv', 'width': 640, 'height': 360},
169         '35': {'ext': 'flv', 'width': 854, 'height': 480},
170         '36': {'ext': '3gp', 'width': 320, 'height': 240},
171         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
172         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
173         '43': {'ext': 'webm', 'width': 640, 'height': 360},
174         '44': {'ext': 'webm', 'width': 854, 'height': 480},
175         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
176         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
177
178
179         # 3d videos
180         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
181         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
182         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
183         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
184         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
185         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
186         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
187
188         # Apple HTTP Live Streaming
189         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
190         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
191         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
192         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
193         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
194         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
195         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
196
197         # DASH mp4 video
198         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
199         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
200         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
201         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
202         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
203         '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
204         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
205         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
206
207         # Dash mp4 audio
208         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
209         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
210         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
211
212         # Dash webm
213         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
214         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
215         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
216         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
217         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
218         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
219         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
220         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
221         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
222         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
223         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
224         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
225         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
226
227         # Dash webm audio
228         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
229         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
230
231         # RTMP (unnamed)
232         '_rtmp': {'protocol': 'rtmp'},
233     }
234
235     IE_NAME = u'youtube'
236     _TESTS = [
237         {
238             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
239             u"file":  u"BaW_jenozKc.mp4",
240             u"info_dict": {
241                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
242                 u"uploader": u"Philipp Hagemeister",
243                 u"uploader_id": u"phihag",
244                 u"upload_date": u"20121002",
245                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
246             }
247         },
248         {
249             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
250             u"file":  u"UxxajLWwzqY.mp4",
251             u"note": u"Test generic use_cipher_signature video (#897)",
252             u"info_dict": {
253                 u"upload_date": u"20120506",
254                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
255                 u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
256                 u"uploader": u"Icona Pop",
257                 u"uploader_id": u"IconaPop"
258             }
259         },
260         {
261             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
262             u"file":  u"07FYdnEawAQ.mp4",
263             u"note": u"Test VEVO video with age protection (#956)",
264             u"info_dict": {
265                 u"upload_date": u"20130703",
266                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
267                 u"description": u"md5:64249768eec3bc4276236606ea996373",
268                 u"uploader": u"justintimberlakeVEVO",
269                 u"uploader_id": u"justintimberlakeVEVO"
270             }
271         },
272         {
273             u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
274             u"file":  u"yZIXLfi8CZQ.mp4",
275             u"note": u"Embed-only video (#1746)",
276             u"info_dict": {
277                 u"upload_date": u"20120608",
278                 u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
279                 u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
280                 u"uploader": u"SET India",
281                 u"uploader_id": u"setindia"
282             }
283         },
284         {
285             u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
286             u"file": u"a9LDPn-MO4I.m4a",
287             u"note": u"256k DASH audio (format 141) via DASH manifest",
288             u"info_dict": {
289                 u"upload_date": "20121002",
290                 u"uploader_id": "8KVIDEO",
291                 u"description": "No description available.",
292                 u"uploader": "8KVIDEO",
293                 u"title": "UHDTV TEST 8K VIDEO.mp4"
294             },
295             u"params": {
296                 u"youtube_include_dash_manifest": True,
297                 u"format": "141",
298             },
299         },
300         # DASH manifest with encrypted signature
301         {
302             u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
303             u'info_dict': {
304                 u'id': u'IB3lcPjvWLA',
305                 u'ext': u'm4a',
306                 u'title': u'Afrojack - The Spark ft. Spree Wilson',
307                 u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
308                 u'uploader': u'AfrojackVEVO',
309                 u'uploader_id': u'AfrojackVEVO',
310                 u'upload_date': u'20131011',
311             },
312             u"params": {
313                 u'youtube_include_dash_manifest': True,
314                 u'format': '141',
315             },
316         },
317     ]
318
319
320     @classmethod
321     def suitable(cls, url):
322         """Receives a URL and returns True if suitable for this IE."""
323         if YoutubePlaylistIE.suitable(url): return False
324         return re.match(cls._VALID_URL, url) is not None
325
326     def __init__(self, *args, **kwargs):
327         super(YoutubeIE, self).__init__(*args, **kwargs)
328         self._player_cache = {}
329
330     def report_video_info_webpage_download(self, video_id):
331         """Report attempt to download video info webpage."""
332         self.to_screen(u'%s: Downloading video info webpage' % video_id)
333
334     def report_information_extraction(self, video_id):
335         """Report attempt to extract video information."""
336         self.to_screen(u'%s: Extracting video information' % video_id)
337
338     def report_unavailable_format(self, video_id, format):
339         """Report extracted video URL."""
340         self.to_screen(u'%s: Format %s not available' % (video_id, format))
341
342     def report_rtmp_download(self):
343         """Indicate the download will use the RTMP protocol."""
344         self.to_screen(u'RTMP download detected')
345
346     def _extract_signature_function(self, video_id, player_url, slen):
347         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
348                         player_url)
349         player_type = id_m.group('ext')
350         player_id = id_m.group('id')
351
352         # Read from filesystem cache
353         func_id = '%s_%s_%d' % (player_type, player_id, slen)
354         assert os.path.basename(func_id) == func_id
355         cache_dir = get_cachedir(self._downloader.params)
356
357         cache_enabled = cache_dir is not None
358         if cache_enabled:
359             cache_fn = os.path.join(os.path.expanduser(cache_dir),
360                                     u'youtube-sigfuncs',
361                                     func_id + '.json')
362             try:
363                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
364                     cache_spec = json.load(cachef)
365                 return lambda s: u''.join(s[i] for i in cache_spec)
366             except IOError:
367                 pass  # No cache available
368
369         if player_type == 'js':
370             code = self._download_webpage(
371                 player_url, video_id,
372                 note=u'Downloading %s player %s' % (player_type, player_id),
373                 errnote=u'Download of %s failed' % player_url)
374             res = self._parse_sig_js(code)
375         elif player_type == 'swf':
376             urlh = self._request_webpage(
377                 player_url, video_id,
378                 note=u'Downloading %s player %s' % (player_type, player_id),
379                 errnote=u'Download of %s failed' % player_url)
380             code = urlh.read()
381             res = self._parse_sig_swf(code)
382         else:
383             assert False, 'Invalid player type %r' % player_type
384
385         if cache_enabled:
386             try:
387                 test_string = u''.join(map(compat_chr, range(slen)))
388                 cache_res = res(test_string)
389                 cache_spec = [ord(c) for c in cache_res]
390                 try:
391                     os.makedirs(os.path.dirname(cache_fn))
392                 except OSError as ose:
393                     if ose.errno != errno.EEXIST:
394                         raise
395                 write_json_file(cache_spec, cache_fn)
396             except Exception:
397                 tb = traceback.format_exc()
398                 self._downloader.report_warning(
399                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
400
401         return res
402
403     def _print_sig_code(self, func, slen):
404         def gen_sig_code(idxs):
405             def _genslice(start, end, step):
406                 starts = u'' if start == 0 else str(start)
407                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
408                 steps = u'' if step == 1 else (u':%d' % step)
409                 return u's[%s%s%s]' % (starts, ends, steps)
410
411             step = None
412             start = '(Never used)'  # Quelch pyflakes warnings - start will be
413                                     # set as soon as step is set
414             for i, prev in zip(idxs[1:], idxs[:-1]):
415                 if step is not None:
416                     if i - prev == step:
417                         continue
418                     yield _genslice(start, prev, step)
419                     step = None
420                     continue
421                 if i - prev in [-1, 1]:
422                     step = i - prev
423                     start = prev
424                     continue
425                 else:
426                     yield u's[%d]' % prev
427             if step is None:
428                 yield u's[%d]' % i
429             else:
430                 yield _genslice(start, i, step)
431
432         test_string = u''.join(map(compat_chr, range(slen)))
433         cache_res = func(test_string)
434         cache_spec = [ord(c) for c in cache_res]
435         expr_code = u' + '.join(gen_sig_code(cache_spec))
436         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
437         self.to_screen(u'Extracted signature function:\n' + code)
438
439     def _parse_sig_js(self, jscode):
440         funcname = self._search_regex(
441             r'signature=([a-zA-Z]+)', jscode,
442              u'Initial JS player signature function name')
443
444         jsi = JSInterpreter(jscode)
445         initial_function = jsi.extract_function(funcname)
446         return lambda s: initial_function([s])
447
448     def _parse_sig_swf(self, file_contents):
449         if file_contents[1:3] != b'WS':
450             raise ExtractorError(
451                 u'Not an SWF file; header is %r' % file_contents[:3])
452         if file_contents[:1] == b'C':
453             content = zlib.decompress(file_contents[8:])
454         else:
455             raise NotImplementedError(u'Unsupported compression format %r' %
456                                       file_contents[:1])
457
458         def extract_tags(content):
459             pos = 0
460             while pos < len(content):
461                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
462                 pos += 2
463                 tag_code = header16 >> 6
464                 tag_len = header16 & 0x3f
465                 if tag_len == 0x3f:
466                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
467                     pos += 4
468                 assert pos+tag_len <= len(content)
469                 yield (tag_code, content[pos:pos+tag_len])
470                 pos += tag_len
471
472         code_tag = next(tag
473                         for tag_code, tag in extract_tags(content)
474                         if tag_code == 82)
475         p = code_tag.index(b'\0', 4) + 1
476         code_reader = io.BytesIO(code_tag[p:])
477
478         # Parse ABC (AVM2 ByteCode)
479         def read_int(reader=None):
480             if reader is None:
481                 reader = code_reader
482             res = 0
483             shift = 0
484             for _ in range(5):
485                 buf = reader.read(1)
486                 assert len(buf) == 1
487                 b = struct.unpack('<B', buf)[0]
488                 res = res | ((b & 0x7f) << shift)
489                 if b & 0x80 == 0:
490                     break
491                 shift += 7
492             return res
493
494         def u30(reader=None):
495             res = read_int(reader)
496             assert res & 0xf0000000 == 0
497             return res
498         u32 = read_int
499
500         def s32(reader=None):
501             v = read_int(reader)
502             if v & 0x80000000 != 0:
503                 v = - ((v ^ 0xffffffff) + 1)
504             return v
505
506         def read_string(reader=None):
507             if reader is None:
508                 reader = code_reader
509             slen = u30(reader)
510             resb = reader.read(slen)
511             assert len(resb) == slen
512             return resb.decode('utf-8')
513
514         def read_bytes(count, reader=None):
515             if reader is None:
516                 reader = code_reader
517             resb = reader.read(count)
518             assert len(resb) == count
519             return resb
520
521         def read_byte(reader=None):
522             resb = read_bytes(1, reader=reader)
523             res = struct.unpack('<B', resb)[0]
524             return res
525
526         # minor_version + major_version
527         read_bytes(2 + 2)
528
529         # Constant pool
530         int_count = u30()
531         for _c in range(1, int_count):
532             s32()
533         uint_count = u30()
534         for _c in range(1, uint_count):
535             u32()
536         double_count = u30()
537         read_bytes((double_count-1) * 8)
538         string_count = u30()
539         constant_strings = [u'']
540         for _c in range(1, string_count):
541             s = read_string()
542             constant_strings.append(s)
543         namespace_count = u30()
544         for _c in range(1, namespace_count):
545             read_bytes(1)  # kind
546             u30()  # name
547         ns_set_count = u30()
548         for _c in range(1, ns_set_count):
549             count = u30()
550             for _c2 in range(count):
551                 u30()
552         multiname_count = u30()
553         MULTINAME_SIZES = {
554             0x07: 2,  # QName
555             0x0d: 2,  # QNameA
556             0x0f: 1,  # RTQName
557             0x10: 1,  # RTQNameA
558             0x11: 0,  # RTQNameL
559             0x12: 0,  # RTQNameLA
560             0x09: 2,  # Multiname
561             0x0e: 2,  # MultinameA
562             0x1b: 1,  # MultinameL
563             0x1c: 1,  # MultinameLA
564         }
565         multinames = [u'']
566         for _c in range(1, multiname_count):
567             kind = u30()
568             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
569             if kind == 0x07:
570                 u30()  # namespace_idx
571                 name_idx = u30()
572                 multinames.append(constant_strings[name_idx])
573             else:
574                 multinames.append('[MULTINAME kind: %d]' % kind)
575                 for _c2 in range(MULTINAME_SIZES[kind]):
576                     u30()
577
578         # Methods
579         method_count = u30()
580         MethodInfo = collections.namedtuple(
581             'MethodInfo',
582             ['NEED_ARGUMENTS', 'NEED_REST'])
583         method_infos = []
584         for method_id in range(method_count):
585             param_count = u30()
586             u30()  # return type
587             for _ in range(param_count):
588                 u30()  # param type
589             u30()  # name index (always 0 for youtube)
590             flags = read_byte()
591             if flags & 0x08 != 0:
592                 # Options present
593                 option_count = u30()
594                 for c in range(option_count):
595                     u30()  # val
596                     read_bytes(1)  # kind
597             if flags & 0x80 != 0:
598                 # Param names present
599                 for _ in range(param_count):
600                     u30()  # param name
601             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
602             method_infos.append(mi)
603
604         # Metadata
605         metadata_count = u30()
606         for _c in range(metadata_count):
607             u30()  # name
608             item_count = u30()
609             for _c2 in range(item_count):
610                 u30()  # key
611                 u30()  # value
612
613         def parse_traits_info():
614             trait_name_idx = u30()
615             kind_full = read_byte()
616             kind = kind_full & 0x0f
617             attrs = kind_full >> 4
618             methods = {}
619             if kind in [0x00, 0x06]:  # Slot or Const
620                 u30()  # Slot id
621                 u30()  # type_name_idx
622                 vindex = u30()
623                 if vindex != 0:
624                     read_byte()  # vkind
625             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
626                 u30()  # disp_id
627                 method_idx = u30()
628                 methods[multinames[trait_name_idx]] = method_idx
629             elif kind == 0x04:  # Class
630                 u30()  # slot_id
631                 u30()  # classi
632             elif kind == 0x05:  # Function
633                 u30()  # slot_id
634                 function_idx = u30()
635                 methods[function_idx] = multinames[trait_name_idx]
636             else:
637                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
638
639             if attrs & 0x4 != 0:  # Metadata present
640                 metadata_count = u30()
641                 for _c3 in range(metadata_count):
642                     u30()  # metadata index
643
644             return methods
645
646         # Classes
647         TARGET_CLASSNAME = u'SignatureDecipher'
648         searched_idx = multinames.index(TARGET_CLASSNAME)
649         searched_class_id = None
650         class_count = u30()
651         for class_id in range(class_count):
652             name_idx = u30()
653             if name_idx == searched_idx:
654                 # We found the class we're looking for!
655                 searched_class_id = class_id
656             u30()  # super_name idx
657             flags = read_byte()
658             if flags & 0x08 != 0:  # Protected namespace is present
659                 u30()  # protected_ns_idx
660             intrf_count = u30()
661             for _c2 in range(intrf_count):
662                 u30()
663             u30()  # iinit
664             trait_count = u30()
665             for _c2 in range(trait_count):
666                 parse_traits_info()
667
668         if searched_class_id is None:
669             raise ExtractorError(u'Target class %r not found' %
670                                  TARGET_CLASSNAME)
671
672         method_names = {}
673         method_idxs = {}
674         for class_id in range(class_count):
675             u30()  # cinit
676             trait_count = u30()
677             for _c2 in range(trait_count):
678                 trait_methods = parse_traits_info()
679                 if class_id == searched_class_id:
680                     method_names.update(trait_methods.items())
681                     method_idxs.update(dict(
682                         (idx, name)
683                         for name, idx in trait_methods.items()))
684
685         # Scripts
686         script_count = u30()
687         for _c in range(script_count):
688             u30()  # init
689             trait_count = u30()
690             for _c2 in range(trait_count):
691                 parse_traits_info()
692
693         # Method bodies
694         method_body_count = u30()
695         Method = collections.namedtuple('Method', ['code', 'local_count'])
696         methods = {}
697         for _c in range(method_body_count):
698             method_idx = u30()
699             u30()  # max_stack
700             local_count = u30()
701             u30()  # init_scope_depth
702             u30()  # max_scope_depth
703             code_length = u30()
704             code = read_bytes(code_length)
705             if method_idx in method_idxs:
706                 m = Method(code, local_count)
707                 methods[method_idxs[method_idx]] = m
708             exception_count = u30()
709             for _c2 in range(exception_count):
710                 u30()  # from
711                 u30()  # to
712                 u30()  # target
713                 u30()  # exc_type
714                 u30()  # var_name
715             trait_count = u30()
716             for _c2 in range(trait_count):
717                 parse_traits_info()
718
719         assert p + code_reader.tell() == len(code_tag)
720         assert len(methods) == len(method_idxs)
721
722         method_pyfunctions = {}
723
724         def extract_function(func_name):
725             if func_name in method_pyfunctions:
726                 return method_pyfunctions[func_name]
727             if func_name not in methods:
728                 raise ExtractorError(u'Cannot find function %r' % func_name)
729             m = methods[func_name]
730
731             def resfunc(args):
732                 registers = ['(this)'] + list(args) + [None] * m.local_count
733                 stack = []
734                 coder = io.BytesIO(m.code)
735                 while True:
736                     opcode = struct.unpack('!B', coder.read(1))[0]
737                     if opcode == 36:  # pushbyte
738                         v = struct.unpack('!B', coder.read(1))[0]
739                         stack.append(v)
740                     elif opcode == 44:  # pushstring
741                         idx = u30(coder)
742                         stack.append(constant_strings[idx])
743                     elif opcode == 48:  # pushscope
744                         # We don't implement the scope register, so we'll just
745                         # ignore the popped value
746                         stack.pop()
747                     elif opcode == 70:  # callproperty
748                         index = u30(coder)
749                         mname = multinames[index]
750                         arg_count = u30(coder)
751                         args = list(reversed(
752                             [stack.pop() for _ in range(arg_count)]))
753                         obj = stack.pop()
754                         if mname == u'split':
755                             assert len(args) == 1
756                             assert isinstance(args[0], compat_str)
757                             assert isinstance(obj, compat_str)
758                             if args[0] == u'':
759                                 res = list(obj)
760                             else:
761                                 res = obj.split(args[0])
762                             stack.append(res)
763                         elif mname == u'slice':
764                             assert len(args) == 1
765                             assert isinstance(args[0], int)
766                             assert isinstance(obj, list)
767                             res = obj[args[0]:]
768                             stack.append(res)
769                         elif mname == u'join':
770                             assert len(args) == 1
771                             assert isinstance(args[0], compat_str)
772                             assert isinstance(obj, list)
773                             res = args[0].join(obj)
774                             stack.append(res)
775                         elif mname in method_pyfunctions:
776                             stack.append(method_pyfunctions[mname](args))
777                         else:
778                             raise NotImplementedError(
779                                 u'Unsupported property %r on %r'
780                                 % (mname, obj))
781                     elif opcode == 72:  # returnvalue
782                         res = stack.pop()
783                         return res
784                     elif opcode == 79:  # callpropvoid
785                         index = u30(coder)
786                         mname = multinames[index]
787                         arg_count = u30(coder)
788                         args = list(reversed(
789                             [stack.pop() for _ in range(arg_count)]))
790                         obj = stack.pop()
791                         if mname == u'reverse':
792                             assert isinstance(obj, list)
793                             obj.reverse()
794                         else:
795                             raise NotImplementedError(
796                                 u'Unsupported (void) property %r on %r'
797                                 % (mname, obj))
798                     elif opcode == 93:  # findpropstrict
799                         index = u30(coder)
800                         mname = multinames[index]
801                         res = extract_function(mname)
802                         stack.append(res)
803                     elif opcode == 97:  # setproperty
804                         index = u30(coder)
805                         value = stack.pop()
806                         idx = stack.pop()
807                         obj = stack.pop()
808                         assert isinstance(obj, list)
809                         assert isinstance(idx, int)
810                         obj[idx] = value
811                     elif opcode == 98:  # getlocal
812                         index = u30(coder)
813                         stack.append(registers[index])
814                     elif opcode == 99:  # setlocal
815                         index = u30(coder)
816                         value = stack.pop()
817                         registers[index] = value
818                     elif opcode == 102:  # getproperty
819                         index = u30(coder)
820                         pname = multinames[index]
821                         if pname == u'length':
822                             obj = stack.pop()
823                             assert isinstance(obj, list)
824                             stack.append(len(obj))
825                         else:  # Assume attribute access
826                             idx = stack.pop()
827                             assert isinstance(idx, int)
828                             obj = stack.pop()
829                             assert isinstance(obj, list)
830                             stack.append(obj[idx])
831                     elif opcode == 128:  # coerce
832                         u30(coder)
833                     elif opcode == 133:  # coerce_s
834                         assert isinstance(stack[-1], (type(None), compat_str))
835                     elif opcode == 164:  # modulo
836                         value2 = stack.pop()
837                         value1 = stack.pop()
838                         res = value1 % value2
839                         stack.append(res)
840                     elif opcode == 208:  # getlocal_0
841                         stack.append(registers[0])
842                     elif opcode == 209:  # getlocal_1
843                         stack.append(registers[1])
844                     elif opcode == 210:  # getlocal_2
845                         stack.append(registers[2])
846                     elif opcode == 211:  # getlocal_3
847                         stack.append(registers[3])
848                     elif opcode == 214:  # setlocal_2
849                         registers[2] = stack.pop()
850                     elif opcode == 215:  # setlocal_3
851                         registers[3] = stack.pop()
852                     else:
853                         raise NotImplementedError(
854                             u'Unsupported opcode %d' % opcode)
855
856             method_pyfunctions[func_name] = resfunc
857             return resfunc
858
859         initial_function = extract_function(u'decipher')
860         return lambda s: initial_function([s])
861
862     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
863         """Turn the encrypted s field into a working signature"""
864
865         if player_url is not None:
866             if player_url.startswith(u'//'):
867                 player_url = u'https:' + player_url
868             try:
869                 player_id = (player_url, len(s))
870                 if player_id not in self._player_cache:
871                     func = self._extract_signature_function(
872                         video_id, player_url, len(s)
873                     )
874                     self._player_cache[player_id] = func
875                 func = self._player_cache[player_id]
876                 if self._downloader.params.get('youtube_print_sig_code'):
877                     self._print_sig_code(func, len(s))
878                 return func(s)
879             except Exception:
880                 tb = traceback.format_exc()
881                 self._downloader.report_warning(
882                     u'Automatic signature extraction failed: ' + tb)
883
884             self._downloader.report_warning(
885                 u'Warning: Falling back to static signature algorithm')
886
887         return self._static_decrypt_signature(
888             s, video_id, player_url, age_gate)
889
890     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
891         if age_gate:
892             # The videos with age protection use another player, so the
893             # algorithms can be different.
894             if len(s) == 86:
895                 return s[2:63] + s[82] + s[64:82] + s[63]
896
897         if len(s) == 93:
898             return s[86:29:-1] + s[88] + s[28:5:-1]
899         elif len(s) == 92:
900             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
901         elif len(s) == 91:
902             return s[84:27:-1] + s[86] + s[26:5:-1]
903         elif len(s) == 90:
904             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
905         elif len(s) == 89:
906             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
907         elif len(s) == 88:
908             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
909         elif len(s) == 87:
910             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
911         elif len(s) == 86:
912             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
913         elif len(s) == 85:
914             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
915         elif len(s) == 84:
916             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
917         elif len(s) == 83:
918             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
919         elif len(s) == 82:
920             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
921         elif len(s) == 81:
922             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
923         elif len(s) == 80:
924             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
925         elif len(s) == 79:
926             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
927
928         else:
929             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
930
931     def _get_available_subtitles(self, video_id, webpage):
932         try:
933             sub_list = self._download_webpage(
934                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
935                 video_id, note=False)
936         except ExtractorError as err:
937             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
938             return {}
939         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
940
941         sub_lang_list = {}
942         for l in lang_list:
943             lang = l[1]
944             params = compat_urllib_parse.urlencode({
945                 'lang': lang,
946                 'v': video_id,
947                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
948                 'name': unescapeHTML(l[0]).encode('utf-8'),
949             })
950             url = u'https://www.youtube.com/api/timedtext?' + params
951             sub_lang_list[lang] = url
952         if not sub_lang_list:
953             self._downloader.report_warning(u'video doesn\'t have subtitles')
954             return {}
955         return sub_lang_list
956
957     def _get_available_automatic_caption(self, video_id, webpage):
958         """We need the webpage for getting the captions url, pass it as an
959            argument to speed up the process."""
960         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
961         self.to_screen(u'%s: Looking for automatic captions' % video_id)
962         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
963         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
964         if mobj is None:
965             self._downloader.report_warning(err_msg)
966             return {}
967         player_config = json.loads(mobj.group(1))
968         try:
969             args = player_config[u'args']
970             caption_url = args[u'ttsurl']
971             timestamp = args[u'timestamp']
972             # We get the available subtitles
973             list_params = compat_urllib_parse.urlencode({
974                 'type': 'list',
975                 'tlangs': 1,
976                 'asrs': 1,
977             })
978             list_url = caption_url + '&' + list_params
979             caption_list = self._download_xml(list_url, video_id)
980             original_lang_node = caption_list.find('track')
981             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
982                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
983                 return {}
984             original_lang = original_lang_node.attrib['lang_code']
985
986             sub_lang_list = {}
987             for lang_node in caption_list.findall('target'):
988                 sub_lang = lang_node.attrib['lang_code']
989                 params = compat_urllib_parse.urlencode({
990                     'lang': original_lang,
991                     'tlang': sub_lang,
992                     'fmt': sub_format,
993                     'ts': timestamp,
994                     'kind': 'asr',
995                 })
996                 sub_lang_list[sub_lang] = caption_url + '&' + params
997             return sub_lang_list
998         # An extractor error can be raise by the download process if there are
999         # no automatic captions but there are subtitles
1000         except (KeyError, ExtractorError):
1001             self._downloader.report_warning(err_msg)
1002             return {}
1003
1004     @classmethod
1005     def extract_id(cls, url):
1006         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1007         if mobj is None:
1008             raise ExtractorError(u'Invalid URL: %s' % url)
1009         video_id = mobj.group(2)
1010         return video_id
1011
1012     def _extract_from_m3u8(self, manifest_url, video_id):
1013         url_map = {}
1014         def _get_urls(_manifest):
1015             lines = _manifest.split('\n')
1016             urls = filter(lambda l: l and not l.startswith('#'),
1017                             lines)
1018             return urls
1019         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1020         formats_urls = _get_urls(manifest)
1021         for format_url in formats_urls:
1022             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1023             url_map[itag] = format_url
1024         return url_map
1025
1026     def _extract_annotations(self, video_id):
1027         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1028         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1029
1030     def _real_extract(self, url):
1031         proto = (
1032             u'http' if self._downloader.params.get('prefer_insecure', False)
1033             else u'https')
1034
1035         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1036         mobj = re.search(self._NEXT_URL_RE, url)
1037         if mobj:
1038             url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1039         video_id = self.extract_id(url)
1040
1041         # Get video webpage
1042         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1043         video_webpage = self._download_webpage(url, video_id)
1044
1045         # Attempt to extract SWF player URL
1046         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1047         if mobj is not None:
1048             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1049         else:
1050             player_url = None
1051
1052         # Get video info
1053         self.report_video_info_webpage_download(video_id)
1054         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1055             self.report_age_confirmation()
1056             age_gate = True
1057             # We simulate the access to the video from www.youtube.com/v/{video_id}
1058             # this can be viewed without login into Youtube
1059             data = compat_urllib_parse.urlencode({'video_id': video_id,
1060                                                   'el': 'player_embedded',
1061                                                   'gl': 'US',
1062                                                   'hl': 'en',
1063                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1064                                                   'asv': 3,
1065                                                   'sts':'1588',
1066                                                   })
1067             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1068             video_info_webpage = self._download_webpage(video_info_url, video_id,
1069                                     note=False,
1070                                     errnote='unable to download video info webpage')
1071             video_info = compat_parse_qs(video_info_webpage)
1072         else:
1073             age_gate = False
1074             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1075                 video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1076                         % (video_id, el_type))
1077                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1078                                         note=False,
1079                                         errnote='unable to download video info webpage')
1080                 video_info = compat_parse_qs(video_info_webpage)
1081                 if 'token' in video_info:
1082                     break
1083         if 'token' not in video_info:
1084             if 'reason' in video_info:
1085                 raise ExtractorError(
1086                     u'YouTube said: %s' % video_info['reason'][0],
1087                     expected=True, video_id=video_id)
1088             else:
1089                 raise ExtractorError(
1090                     u'"token" parameter not in video info for unknown reason',
1091                     video_id=video_id)
1092
1093         if 'view_count' in video_info:
1094             view_count = int(video_info['view_count'][0])
1095         else:
1096             view_count = None
1097
1098         # Check for "rental" videos
1099         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1100             raise ExtractorError(u'"rental" videos not supported')
1101
1102         # Start extracting information
1103         self.report_information_extraction(video_id)
1104
1105         # uploader
1106         if 'author' not in video_info:
1107             raise ExtractorError(u'Unable to extract uploader name')
1108         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1109
1110         # uploader_id
1111         video_uploader_id = None
1112         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1113         if mobj is not None:
1114             video_uploader_id = mobj.group(1)
1115         else:
1116             self._downloader.report_warning(u'unable to extract uploader nickname')
1117
1118         # title
1119         if 'title' in video_info:
1120             video_title = video_info['title'][0]
1121         else:
1122             self._downloader.report_warning(u'Unable to extract video title')
1123             video_title = u'_'
1124
1125         # thumbnail image
1126         # We try first to get a high quality image:
1127         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1128                             video_webpage, re.DOTALL)
1129         if m_thumb is not None:
1130             video_thumbnail = m_thumb.group(1)
1131         elif 'thumbnail_url' not in video_info:
1132             self._downloader.report_warning(u'unable to extract video thumbnail')
1133             video_thumbnail = None
1134         else:   # don't panic if we can't find it
1135             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1136
1137         # upload date
1138         upload_date = None
1139         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1140         if mobj is not None:
1141             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1142             upload_date = unified_strdate(upload_date)
1143
1144
1145         video_categories = []
1146         # categories
1147         m_cat_container = get_element_by_id("eow-category", video_webpage)
1148         if m_cat_container:
1149             video_categories = re.findall(r'<a[^<]+>(.*?)</a>',
1150                                 m_cat_container, re.DOTALL)
1151
1152         # description
1153         video_description = get_element_by_id("eow-description", video_webpage)
1154         if video_description:
1155             video_description = re.sub(r'''(?x)
1156                 <a\s+
1157                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1158                     title="([^"]+)"\s+
1159                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1160                     class="yt-uix-redirect-link"\s*>
1161                 [^<]+
1162                 </a>
1163             ''', r'\1', video_description)
1164             video_description = clean_html(video_description)
1165         else:
1166             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1167             if fd_mobj:
1168                 video_description = unescapeHTML(fd_mobj.group(1))
1169             else:
1170                 video_description = u''
1171
1172         def _extract_count(klass):
1173             count = self._search_regex(
1174                 r'class="%s">([\d,]+)</span>' % re.escape(klass),
1175                 video_webpage, klass, default=None)
1176             if count is not None:
1177                 return int(count.replace(',', ''))
1178             return None
1179         like_count = _extract_count(u'likes-count')
1180         dislike_count = _extract_count(u'dislikes-count')
1181
1182         # subtitles
1183         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1184
1185         if self._downloader.params.get('listsubtitles', False):
1186             self._list_available_subtitles(video_id, video_webpage)
1187             return
1188
1189         if 'length_seconds' not in video_info:
1190             self._downloader.report_warning(u'unable to extract video duration')
1191             video_duration = None
1192         else:
1193             video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
1194
1195         # annotations
1196         video_annotations = None
1197         if self._downloader.params.get('writeannotations', False):
1198                 video_annotations = self._extract_annotations(video_id)
1199
1200         # Decide which formats to download
1201         try:
1202             mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1203             if not mobj:
1204                 raise ValueError('Could not find vevo ID')
1205             json_code = uppercase_escape(mobj.group(1))
1206             ytplayer_config = json.loads(json_code)
1207             args = ytplayer_config['args']
1208             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1209             # this signatures are encrypted
1210             if 'url_encoded_fmt_stream_map' not in args:
1211                 raise ValueError(u'No stream_map present')  # caught below
1212             re_signature = re.compile(r'[&,]s=')
1213             m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
1214             if m_s is not None:
1215                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1216                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1217             m_s = re_signature.search(args.get('adaptive_fmts', u''))
1218             if m_s is not None:
1219                 if 'adaptive_fmts' in video_info:
1220                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
1221                 else:
1222                     video_info['adaptive_fmts'] = [args['adaptive_fmts']]
1223         except ValueError:
1224             pass
1225
1226         def _map_to_format_list(urlmap):
1227             formats = []
1228             for itag, video_real_url in urlmap.items():
1229                 dct = {
1230                     'format_id': itag,
1231                     'url': video_real_url,
1232                     'player_url': player_url,
1233                 }
1234                 if itag in self._formats:
1235                     dct.update(self._formats[itag])
1236                 formats.append(dct)
1237             return formats
1238
1239         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1240             self.report_rtmp_download()
1241             formats = [{
1242                 'format_id': '_rtmp',
1243                 'protocol': 'rtmp',
1244                 'url': video_info['conn'][0],
1245                 'player_url': player_url,
1246             }]
1247         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
1248             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
1249             if 'rtmpe%3Dyes' in encoded_url_map:
1250                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1251             url_map = {}
1252             for url_data_str in encoded_url_map.split(','):
1253                 url_data = compat_parse_qs(url_data_str)
1254                 if 'itag' in url_data and 'url' in url_data:
1255                     url = url_data['url'][0]
1256                     if 'sig' in url_data:
1257                         url += '&signature=' + url_data['sig'][0]
1258                     elif 's' in url_data:
1259                         encrypted_sig = url_data['s'][0]
1260                         if self._downloader.params.get('verbose'):
1261                             if age_gate:
1262                                 if player_url is None:
1263                                     player_version = 'unknown'
1264                                 else:
1265                                     player_version = self._search_regex(
1266                                         r'-(.+)\.swf$', player_url,
1267                                         u'flash player', fatal=False)
1268                                 player_desc = 'flash player %s' % player_version
1269                             else:
1270                                 player_version = self._search_regex(
1271                                     r'html5player-(.+?)\.js', video_webpage,
1272                                     'html5 player', fatal=False)
1273                                 player_desc = u'html5 player %s' % player_version
1274
1275                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1276                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1277                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1278
1279                         if not age_gate:
1280                             jsplayer_url_json = self._search_regex(
1281                                 r'"assets":.+?"js":\s*("[^"]+")',
1282                                 video_webpage, u'JS player URL')
1283                             player_url = json.loads(jsplayer_url_json)
1284
1285                         signature = self._decrypt_signature(
1286                             encrypted_sig, video_id, player_url, age_gate)
1287                         url += '&signature=' + signature
1288                     if 'ratebypass' not in url:
1289                         url += '&ratebypass=yes'
1290                     url_map[url_data['itag'][0]] = url
1291             formats = _map_to_format_list(url_map)
1292         elif video_info.get('hlsvp'):
1293             manifest_url = video_info['hlsvp'][0]
1294             url_map = self._extract_from_m3u8(manifest_url, video_id)
1295             formats = _map_to_format_list(url_map)
1296         else:
1297             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1298
1299         # Look for the DASH manifest
1300         if (self._downloader.params.get('youtube_include_dash_manifest', False)):
1301             try:
1302                 # The DASH manifest used needs to be the one from the original video_webpage.
1303                 # The one found in get_video_info seems to be using different signatures.
1304                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
1305                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
1306                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
1307                 if age_gate:
1308                     dash_manifest_url = video_info.get('dashmpd')[0]
1309                 else:
1310                     dash_manifest_url = ytplayer_config['args']['dashmpd']
1311                 def decrypt_sig(mobj):
1312                     s = mobj.group(1)
1313                     dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1314                     return '/signature/%s' % dec_s
1315                 dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
1316                 dash_doc = self._download_xml(
1317                     dash_manifest_url, video_id,
1318                     note=u'Downloading DASH manifest',
1319                     errnote=u'Could not download DASH manifest')
1320                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1321                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1322                     if url_el is None:
1323                         continue
1324                     format_id = r.attrib['id']
1325                     video_url = url_el.text
1326                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1327                     f = {
1328                         'format_id': format_id,
1329                         'url': video_url,
1330                         'width': int_or_none(r.attrib.get('width')),
1331                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1332                         'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1333                         'filesize': filesize,
1334                     }
1335                     try:
1336                         existing_format = next(
1337                             fo for fo in formats
1338                             if fo['format_id'] == format_id)
1339                     except StopIteration:
1340                         f.update(self._formats.get(format_id, {}))
1341                         formats.append(f)
1342                     else:
1343                         existing_format.update(f)
1344
1345             except (ExtractorError, KeyError) as e:
1346                 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
1347
1348         self._sort_formats(formats)
1349
1350         return {
1351             'id':           video_id,
1352             'uploader':     video_uploader,
1353             'uploader_id':  video_uploader_id,
1354             'upload_date':  upload_date,
1355             'title':        video_title,
1356             'thumbnail':    video_thumbnail,
1357             'description':  video_description,
1358             'categories':   video_categories,
1359             'subtitles':    video_subtitles,
1360             'duration':     video_duration,
1361             'age_limit':    18 if age_gate else 0,
1362             'annotations':  video_annotations,
1363             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1364             'view_count':   view_count,
1365             'like_count': like_count,
1366             'dislike_count': dislike_count,
1367             'formats':      formats,
1368         }
1369
1370 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1371     IE_DESC = u'YouTube.com playlists'
1372     _VALID_URL = r"""(?x)(?:
1373                         (?:https?://)?
1374                         (?:\w+\.)?
1375                         youtube\.com/
1376                         (?:
1377                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1378                            \? (?:.*?&)*? (?:p|a|list)=
1379                         |  p/
1380                         )
1381                         (
1382                             (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
1383                             # Top tracks, they can also include dots 
1384                             |(?:MC)[\w\.]*
1385                         )
1386                         .*
1387                      |
1388                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
1389                      )"""
1390     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1391     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
1392     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
1393     IE_NAME = u'youtube:playlist'
1394
1395     def _real_initialize(self):
1396         self._login()
1397
1398     def _ids_to_results(self, ids):
1399         return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
1400                        for vid_id in ids]
1401
1402     def _extract_mix(self, playlist_id):
1403         # The mixes are generated from a a single video
1404         # the id of the playlist is just 'RD' + video_id
1405         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1406         webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
1407         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1408         title_span = (search_title('playlist-title') or
1409             search_title('title long-title') or search_title('title'))
1410         title = clean_html(title_span)
1411         video_re = r'''(?x)data-video-username="(.*?)".*?
1412                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
1413         matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
1414         # Some of the videos may have been deleted, their username field is empty
1415         ids = [video_id for (username, video_id) in matches if username]
1416         url_results = self._ids_to_results(ids)
1417
1418         return self.playlist_result(url_results, playlist_id, title)
1419
1420     def _real_extract(self, url):
1421         # Extract playlist id
1422         mobj = re.match(self._VALID_URL, url)
1423         if mobj is None:
1424             raise ExtractorError(u'Invalid URL: %s' % url)
1425         playlist_id = mobj.group(1) or mobj.group(2)
1426
1427         # Check if it's a video-specific URL
1428         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1429         if 'v' in query_dict:
1430             video_id = query_dict['v'][0]
1431             if self._downloader.params.get('noplaylist'):
1432                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1433                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1434             else:
1435                 self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1436
1437         if playlist_id.startswith('RD'):
1438             # Mixes require a custom extraction process
1439             return self._extract_mix(playlist_id)
1440         if playlist_id.startswith('TL'):
1441             raise ExtractorError(u'For downloading YouTube.com top lists, use '
1442                 u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
1443
1444         url = self._TEMPLATE_URL % playlist_id
1445         page = self._download_webpage(url, playlist_id)
1446         more_widget_html = content_html = page
1447
1448         # Check if the playlist exists or is private
1449         if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
1450             raise ExtractorError(
1451                 u'The playlist doesn\'t exist or is private, use --username or '
1452                 '--netrc to access it.',
1453                 expected=True)
1454
1455         # Extract the video ids from the playlist pages
1456         ids = []
1457
1458         for page_num in itertools.count(1):
1459             matches = re.finditer(self._VIDEO_RE, content_html)
1460             # We remove the duplicates and the link with index 0
1461             # (it's not the first video of the playlist)
1462             new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1463             ids.extend(new_ids)
1464
1465             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1466             if not mobj:
1467                 break
1468
1469             more = self._download_json(
1470                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1471                 'Downloading page #%s' % page_num,
1472                 transform_source=uppercase_escape)
1473             content_html = more['content_html']
1474             more_widget_html = more['load_more_widget_html']
1475
1476         playlist_title = self._html_search_regex(
1477             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1478             page, u'title')
1479
1480         url_results = self._ids_to_results(ids)
1481         return self.playlist_result(url_results, playlist_id, playlist_title)
1482
1483
1484 class YoutubeTopListIE(YoutubePlaylistIE):
1485     IE_NAME = u'youtube:toplist'
1486     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
1487         u' (Example: "yttoplist:music:Top Tracks")')
1488     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
1489
1490     def _real_extract(self, url):
1491         mobj = re.match(self._VALID_URL, url)
1492         channel = mobj.group('chann')
1493         title = mobj.group('title')
1494         query = compat_urllib_parse.urlencode({'title': title})
1495         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
1496         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
1497         link = self._html_search_regex(playlist_re, channel_page, u'list')
1498         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
1499         
1500         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
1501         ids = []
1502         # sometimes the webpage doesn't contain the videos
1503         # retry until we get them
1504         for i in itertools.count(0):
1505             msg = u'Downloading Youtube mix'
1506             if i > 0:
1507                 msg += ', retry #%d' % i
1508             webpage = self._download_webpage(url, title, msg)
1509             ids = orderedSet(re.findall(video_re, webpage))
1510             if ids:
1511                 break
1512         url_results = self._ids_to_results(ids)
1513         return self.playlist_result(url_results, playlist_title=title)
1514
1515
1516 class YoutubeChannelIE(InfoExtractor):
1517     IE_DESC = u'YouTube.com channels'
1518     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1519     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1520     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1521     IE_NAME = u'youtube:channel'
1522
1523     def extract_videos_from_page(self, page):
1524         ids_in_page = []
1525         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1526             if mobj.group(1) not in ids_in_page:
1527                 ids_in_page.append(mobj.group(1))
1528         return ids_in_page
1529
1530     def _real_extract(self, url):
1531         # Extract channel id
1532         mobj = re.match(self._VALID_URL, url)
1533         if mobj is None:
1534             raise ExtractorError(u'Invalid URL: %s' % url)
1535
1536         # Download channel page
1537         channel_id = mobj.group(1)
1538         video_ids = []
1539         url = 'https://www.youtube.com/channel/%s/videos' % channel_id
1540         channel_page = self._download_webpage(url, channel_id)
1541         autogenerated = re.search(r'''(?x)
1542                 class="[^"]*?(?:
1543                     channel-header-autogenerated-label|
1544                     yt-channel-title-autogenerated
1545                 )[^"]*"''', channel_page) is not None
1546
1547         if autogenerated:
1548             # The videos are contained in a single page
1549             # the ajax pages can't be used, they are empty
1550             video_ids = self.extract_videos_from_page(channel_page)
1551         else:
1552             # Download all channel pages using the json-based channel_ajax query
1553             for pagenum in itertools.count(1):
1554                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1555                 page = self._download_json(
1556                     url, channel_id, note=u'Downloading page #%s' % pagenum,
1557                     transform_source=uppercase_escape)
1558
1559                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1560                 video_ids.extend(ids_in_page)
1561     
1562                 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1563                     break
1564
1565         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1566
1567         url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
1568                        for video_id in video_ids]
1569         return self.playlist_result(url_entries, channel_id)
1570
1571
1572 class YoutubeUserIE(InfoExtractor):
1573     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1574     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1575     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
1576     _GDATA_PAGE_SIZE = 50
1577     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1578     IE_NAME = u'youtube:user'
1579
1580     @classmethod
1581     def suitable(cls, url):
1582         # Don't return True if the url can be extracted with other youtube
1583         # extractor, the regex would is too permissive and it would match.
1584         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1585         if any(ie.suitable(url) for ie in other_ies): return False
1586         else: return super(YoutubeUserIE, cls).suitable(url)
1587
1588     def _real_extract(self, url):
1589         # Extract username
1590         mobj = re.match(self._VALID_URL, url)
1591         if mobj is None:
1592             raise ExtractorError(u'Invalid URL: %s' % url)
1593
1594         username = mobj.group(1)
1595
1596         # Download video ids using YouTube Data API. Result size per
1597         # query is limited (currently to 50 videos) so we need to query
1598         # page by page until there are no video ids - it means we got
1599         # all of them.
1600
1601         def download_page(pagenum):
1602             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1603
1604             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1605             page = self._download_webpage(
1606                 gdata_url, username,
1607                 u'Downloading video ids from %d to %d' % (
1608                     start_index, start_index + self._GDATA_PAGE_SIZE))
1609
1610             try:
1611                 response = json.loads(page)
1612             except ValueError as err:
1613                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1614             if 'entry' not in response['feed']:
1615                 return
1616
1617             # Extract video identifiers
1618             entries = response['feed']['entry']
1619             for entry in entries:
1620                 title = entry['title']['$t']
1621                 video_id = entry['id']['$t'].split('/')[-1]
1622                 yield {
1623                     '_type': 'url',
1624                     'url': video_id,
1625                     'ie_key': 'Youtube',
1626                     'id': video_id,
1627                     'title': title,
1628                 }
1629         url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
1630
1631         return self.playlist_result(url_results, playlist_title=username)
1632
1633
1634 class YoutubeSearchIE(SearchInfoExtractor):
1635     IE_DESC = u'YouTube.com searches'
1636     _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1637     _MAX_RESULTS = 1000
1638     IE_NAME = u'youtube:search'
1639     _SEARCH_KEY = 'ytsearch'
1640
1641     def _get_n_results(self, query, n):
1642         """Get a specified number of results for a query"""
1643
1644         video_ids = []
1645         pagenum = 0
1646         limit = n
1647         PAGE_SIZE = 50
1648
1649         while (PAGE_SIZE * pagenum) < limit:
1650             result_url = self._API_URL % (
1651                 compat_urllib_parse.quote_plus(query.encode('utf-8')),
1652                 (PAGE_SIZE * pagenum) + 1)
1653             data_json = self._download_webpage(
1654                 result_url, video_id=u'query "%s"' % query,
1655                 note=u'Downloading page %s' % (pagenum + 1),
1656                 errnote=u'Unable to download API page')
1657             data = json.loads(data_json)
1658             api_response = data['data']
1659
1660             if 'items' not in api_response:
1661                 raise ExtractorError(
1662                     u'[youtube] No video results', expected=True)
1663
1664             new_ids = list(video['id'] for video in api_response['items'])
1665             video_ids += new_ids
1666
1667             limit = min(n, api_response['totalItems'])
1668             pagenum += 1
1669
1670         if len(video_ids) > n:
1671             video_ids = video_ids[:n]
1672         videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
1673                   for video_id in video_ids]
1674         return self.playlist_result(videos, query)
1675
1676
1677 class YoutubeSearchDateIE(YoutubeSearchIE):
1678     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1679     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
1680     _SEARCH_KEY = 'ytsearchdate'
1681     IE_DESC = u'YouTube.com searches, newest videos first'
1682
1683
1684 class YoutubeSearchURLIE(InfoExtractor):
1685     IE_DESC = u'YouTube.com search URLs'
1686     IE_NAME = u'youtube:search_url'
1687     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1688
1689     def _real_extract(self, url):
1690         mobj = re.match(self._VALID_URL, url)
1691         query = compat_urllib_parse.unquote_plus(mobj.group('query'))
1692
1693         webpage = self._download_webpage(url, query)
1694         result_code = self._search_regex(
1695             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
1696
1697         part_codes = re.findall(
1698             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1699         entries = []
1700         for part_code in part_codes:
1701             part_title = self._html_search_regex(
1702                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
1703             part_url_snippet = self._html_search_regex(
1704                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1705             part_url = compat_urlparse.urljoin(
1706                 'https://www.youtube.com/', part_url_snippet)
1707             entries.append({
1708                 '_type': 'url',
1709                 'url': part_url,
1710                 'title': part_title,
1711             })
1712
1713         return {
1714             '_type': 'playlist',
1715             'entries': entries,
1716             'title': query,
1717         }
1718
1719
1720 class YoutubeShowIE(InfoExtractor):
1721     IE_DESC = u'YouTube.com (multi-season) shows'
1722     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1723     IE_NAME = u'youtube:show'
1724
1725     def _real_extract(self, url):
1726         mobj = re.match(self._VALID_URL, url)
1727         show_name = mobj.group(1)
1728         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1729         # There's one playlist for each season of the show
1730         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1731         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1732         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1733
1734
1735 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1736     """
1737     Base class for extractors that fetch info from
1738     http://www.youtube.com/feed_ajax
1739     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1740     """
1741     _LOGIN_REQUIRED = True
1742     # use action_load_personal_feed instead of action_load_system_feed
1743     _PERSONAL_FEED = False
1744
1745     @property
1746     def _FEED_TEMPLATE(self):
1747         action = 'action_load_system_feed'
1748         if self._PERSONAL_FEED:
1749             action = 'action_load_personal_feed'
1750         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1751
1752     @property
1753     def IE_NAME(self):
1754         return u'youtube:%s' % self._FEED_NAME
1755
1756     def _real_initialize(self):
1757         self._login()
1758
1759     def _real_extract(self, url):
1760         feed_entries = []
1761         paging = 0
1762         for i in itertools.count(1):
1763             info = self._download_json(self._FEED_TEMPLATE % paging,
1764                                           u'%s feed' % self._FEED_NAME,
1765                                           u'Downloading page %s' % i)
1766             feed_html = info.get('feed_html') or info.get('content_html')
1767             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1768             ids = orderedSet(m.group(1) for m in m_ids)
1769             feed_entries.extend(
1770                 self.url_result(video_id, 'Youtube', video_id=video_id)
1771                 for video_id in ids)
1772             if info['paging'] is None:
1773                 break
1774             paging = info['paging']
1775         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1776
1777 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1778     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1779     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1780     _FEED_NAME = 'subscriptions'
1781     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1782
1783 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1784     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1785     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1786     _FEED_NAME = 'recommended'
1787     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1788
1789 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1790     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1791     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1792     _FEED_NAME = 'watch_later'
1793     _PLAYLIST_TITLE = u'Youtube Watch Later'
1794     _PERSONAL_FEED = True
1795
1796 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1797     IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
1798     _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
1799     _FEED_NAME = 'history'
1800     _PERSONAL_FEED = True
1801     _PLAYLIST_TITLE = u'Youtube Watch History'
1802
1803 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1804     IE_NAME = u'youtube:favorites'
1805     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1806     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1807     _LOGIN_REQUIRED = True
1808
1809     def _real_extract(self, url):
1810         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1811         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1812         return self.url_result(playlist_id, 'YoutubePlaylist')
1813
1814
1815 class YoutubeTruncatedURLIE(InfoExtractor):
1816     IE_NAME = 'youtube:truncated_url'
1817     IE_DESC = False  # Do not list
1818     _VALID_URL = r'''(?x)
1819         (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
1820         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
1821     '''
1822
1823     def _real_extract(self, url):
1824         raise ExtractorError(
1825             u'Did you forget to quote the URL? Remember that & is a meta '
1826             u'character in most shells, so you want to put the URL in quotes, '
1827             u'like  youtube-dl '
1828             u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1829             u' or simply  youtube-dl BaW_jenozKc  .',
1830             expected=True)