3 from __future__ import unicode_literals
14 from .common import InfoExtractor, SearchInfoExtractor
15 from ..jsinterp import JSInterpreter
16 from ..swfinterp import SWFInterpreter
17 from ..compat import (
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
23 compat_urllib_parse_urlparse,
33 get_element_by_attribute,
52 class YoutubeBaseInfoExtractor(InfoExtractor):
53 """Provide base functions for Youtube extractors"""
54 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
55 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
56 _NETRC_MACHINE = 'youtube'
57 # If True it will raise an error if no login info is provided
58 _LOGIN_REQUIRED = False
60 def _set_language(self):
62 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
63 # YouTube sets the expire time to about two months
64 expire_time=time.time() + 2 * 30 * 24 * 3600)
66 def _ids_to_results(self, ids):
68 self.url_result(vid_id, 'Youtube', video_id=vid_id)
73 Attempt to log in to YouTube.
74 True is returned if successful or skipped.
75 False is returned if login failed.
77 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
79 (username, password) = self._get_login_info()
80 # No authentication to be performed
82 if self._LOGIN_REQUIRED:
83 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
86 login_page = self._download_webpage(
87 self._LOGIN_URL, None,
88 note='Downloading login page',
89 errnote='unable to fetch login page', fatal=False)
90 if login_page is False:
93 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
94 login_page, 'Login GALX parameter')
98 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
103 'PersistentCookie': 'yes',
105 'bgresponse': 'js_disabled',
106 'checkConnection': '',
107 'checkedDomains': 'youtube',
114 'service': 'youtube',
119 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
121 req = sanitized_Request(self._LOGIN_URL, login_data)
122 login_results = self._download_webpage(
124 note='Logging in', errnote='unable to log in', fatal=False)
125 if login_results is False:
128 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
129 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
132 # TODO add SMS and phone call support - these require making a request and then prompting the user
134 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
135 tfa_code = self._get_tfa_info('2-step verification code')
138 self._downloader.report_warning(
139 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
140 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
143 tfa_code = remove_start(tfa_code, 'G-')
145 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
147 tfa_form_strs.update({
152 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
154 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
155 tfa_results = self._download_webpage(
157 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
159 if tfa_results is False:
162 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
163 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
165 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
166 self._downloader.report_warning('unable to log in - did the page structure change?')
168 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
169 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
172 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
173 self._downloader.report_warning('unable to log in: bad username or password')
177 def _real_initialize(self):
178 if self._downloader is None:
181 if not self._login():
185 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
186 # Extract entries from page with "Load more" button
187 def _entries(self, page, playlist_id):
188 more_widget_html = content_html = page
189 for page_num in itertools.count(1):
190 for entry in self._process_page(content_html):
193 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
197 more = self._download_json(
198 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
199 'Downloading page #%s' % page_num,
200 transform_source=uppercase_escape)
201 content_html = more['content_html']
202 if not content_html.strip():
203 # Some webpages show a "Load more" button but they don't
206 more_widget_html = more['load_more_widget_html']
209 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
210 def _process_page(self, content):
211 for video_id, video_title in self.extract_videos_from_page(content):
212 yield self.url_result(video_id, 'Youtube', video_id, video_title)
214 def extract_videos_from_page(self, page):
217 for mobj in re.finditer(self._VIDEO_RE, page):
218 # The link with index 0 is not the first video of the playlist (not sure if still actual)
219 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
221 video_id = mobj.group('id')
222 video_title = unescapeHTML(mobj.group('title'))
224 video_title = video_title.strip()
226 idx = ids_in_page.index(video_id)
227 if video_title and not titles_in_page[idx]:
228 titles_in_page[idx] = video_title
230 ids_in_page.append(video_id)
231 titles_in_page.append(video_title)
232 return zip(ids_in_page, titles_in_page)
235 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
236 def _process_page(self, content):
237 for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
238 yield self.url_result(
239 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
241 def _real_extract(self, url):
242 playlist_id = self._match_id(url)
243 webpage = self._download_webpage(url, playlist_id)
244 title = self._og_search_title(webpage, fatal=False)
245 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
248 class YoutubeIE(YoutubeBaseInfoExtractor):
249 IE_DESC = 'YouTube.com'
250 _VALID_URL = r"""(?x)^
252 (?:https?://|//) # http(s):// or protocol-independent URL
253 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
254 (?:www\.)?deturl\.com/www\.youtube\.com/|
255 (?:www\.)?pwnyoutube\.com/|
256 (?:www\.)?yourepeat\.com/|
257 tube\.majestyc\.net/|
258 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
259 (?:.*?\#/)? # handle anchor (#/) redirect urls
260 (?: # the various things that can precede the ID:
261 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
262 |(?: # or the v= param in all its forms
263 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
264 (?:\?|\#!?) # the params delimiter ? or # or #!
265 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
270 youtu\.be| # just youtu.be/xxxx
271 vid\.plus # or vid.plus/xxxx
273 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
275 )? # all until now is optional -> you can pass the naked ID
276 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
277 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
278 (?(1).+)? # if we found the ID, everything can follow
280 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
281 # tbr was extracted from com/google/youtube/model/VideoFormat.as in watch_as3.swf and converted from Bytes/S to KBits/S
283 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263', 'tbr': 320},
284 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263', 'tbr': 896},
285 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v', 'tbr': 60},
286 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v', 'tbr': 80},
287 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264', 'tbr': 736},
288 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'tbr': 3192},
289 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'tbr': 928},
290 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'tbr': 1280},
291 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
292 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v', 'tbr': 256},
293 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'tbr': 6192},
294 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'tbr': 10128},
295 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'tbr': 928},
296 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'tbr': 1280},
297 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'tbr': 3192},
298 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
299 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'tbr': 1280},
300 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'tbr': 1280},
304 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20, 'tbr': 800},
305 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20, 'tbr': 1152},
306 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20, 'tbr': 3000},
307 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20, 'tbr': 6000},
308 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
309 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
310 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
312 # Apple HTTP Live Streaming
313 '91': {'format_note': 'HLS', 'tbr': 98.4375},
314 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10, 'tbr': 186.625},
315 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10, 'tbr': 951.5625},
316 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10, 'tbr': 1312.5},
317 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10, 'tbr': 3207.421875},
318 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10, 'tbr': 6349.21875},
319 '97': {'format_note': 'HLS', 'tbr': 10128},
320 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
321 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
324 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 261.71875},
325 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 951.5625},
326 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 1312.5},
327 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 3207.421875},
328 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 6349.21875},
329 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 10128.0}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
330 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40, 'tbr': 91.796875},
331 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
332 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
333 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
334 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
337 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash', 'tbr': 32},
338 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash', 'tbr': 128},
339 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash', 'tbr': 320},
342 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
343 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
344 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
345 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
346 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
347 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
348 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
349 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
350 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
351 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
352 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
353 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
354 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
355 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
356 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
357 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
358 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
359 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
360 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
361 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
362 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
363 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
366 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
367 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
369 # Dash webm audio with opus inside
370 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
371 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
372 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
375 '_rtmp': {'protocol': 'rtmp'},
377 # formats extracted from com/google/youtube/model/VideoFormat.as in watch_as3.swf
378 '20': {'tbr': 24000},
381 '25': {'tbr': 312.5},
388 '65': {'tbr': 10128},
389 '81': {'ext': 'mp4', 'tbr': 928},
392 '119': {'ext': 'mp4', 'tbr': 320},
393 '304': {'format_note': 'DASH'},
394 '305': {'format_note': 'DASH'},
396 _SUBTITLE_FORMATS = ('ttml', 'vtt')
401 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
405 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
406 'uploader': 'Philipp Hagemeister',
407 'uploader_id': 'phihag',
408 'upload_date': '20121002',
409 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
410 'categories': ['Science & Technology'],
411 'tags': ['youtube-dl'],
413 'dislike_count': int,
419 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
420 'note': 'Test generic use_cipher_signature video (#897)',
424 'upload_date': '20120506',
425 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
426 'alt_title': 'I Love It (feat. Charli XCX)',
427 'description': 'md5:782e8651347686cba06e58f71ab51773',
428 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
429 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
430 'iconic ep', 'iconic', 'love', 'it'],
431 'uploader': 'Icona Pop',
432 'uploader_id': 'IconaPop',
433 'creator': 'Icona Pop',
437 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
438 'note': 'Test VEVO video with age protection (#956)',
442 'upload_date': '20130703',
443 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
444 'alt_title': 'Tunnel Vision',
445 'description': 'md5:64249768eec3bc4276236606ea996373',
446 'uploader': 'justintimberlakeVEVO',
447 'uploader_id': 'justintimberlakeVEVO',
448 'creator': 'Justin Timberlake',
453 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
454 'note': 'Embed-only video (#1746)',
458 'upload_date': '20120608',
459 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
460 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
461 'uploader': 'SET India',
462 'uploader_id': 'setindia',
467 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
468 'note': 'Use the first video ID in the URL',
472 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
473 'uploader': 'Philipp Hagemeister',
474 'uploader_id': 'phihag',
475 'upload_date': '20121002',
476 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
477 'categories': ['Science & Technology'],
478 'tags': ['youtube-dl'],
480 'dislike_count': int,
483 'skip_download': True,
487 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
488 'note': '256k DASH audio (format 141) via DASH manifest',
492 'upload_date': '20121002',
493 'uploader_id': '8KVIDEO',
495 'uploader': '8KVIDEO',
496 'title': 'UHDTV TEST 8K VIDEO.mp4'
499 'youtube_include_dash_manifest': True,
503 # DASH manifest with encrypted signature
505 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
509 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
510 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
511 'uploader': 'AfrojackVEVO',
512 'uploader_id': 'AfrojackVEVO',
513 'upload_date': '20131011',
516 'youtube_include_dash_manifest': True,
520 # JS player signature function name containing $
522 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
526 'title': 'Taylor Swift - Shake It Off',
527 'alt_title': 'Shake It Off',
528 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
529 'uploader': 'TaylorSwiftVEVO',
530 'uploader_id': 'TaylorSwiftVEVO',
531 'upload_date': '20140818',
532 'creator': 'Taylor Swift',
535 'youtube_include_dash_manifest': True,
541 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
545 'upload_date': '20100909',
546 'uploader': 'The Amazing Atheist',
547 'uploader_id': 'TheAmazingAtheist',
548 'title': 'Burning Everyone\'s Koran',
549 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
552 # Normal age-gate video (No vevo, embed allowed)
554 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
558 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
559 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
560 'uploader': 'The Witcher',
561 'uploader_id': 'WitcherGame',
562 'upload_date': '20140605',
566 # Age-gate video with encrypted signature
568 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
572 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
573 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
574 'uploader': 'LloydVEVO',
575 'uploader_id': 'LloydVEVO',
576 'upload_date': '20110629',
580 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
582 'url': '__2ABJjxzNo',
586 'upload_date': '20100430',
587 'uploader_id': 'deadmau5',
588 'creator': 'deadmau5',
589 'description': 'md5:12c56784b8032162bb936a5f76d55360',
590 'uploader': 'deadmau5',
591 'title': 'Deadmau5 - Some Chords (HD)',
592 'alt_title': 'Some Chords',
594 'expected_warnings': [
595 'DASH manifest missing',
598 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
600 'url': 'lqQg6PlCWgI',
604 'upload_date': '20150827',
605 'uploader_id': 'olympic',
606 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
607 'uploader': 'Olympics',
608 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
611 'skip_download': 'requires avconv',
616 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
620 'stretched_ratio': 16 / 9.,
621 'upload_date': '20110310',
622 'uploader_id': 'AllenMeow',
623 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
625 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
628 # url_encoded_fmt_stream_map is empty string
630 'url': 'qEJwOuvDf7I',
634 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
636 'upload_date': '20150404',
637 'uploader_id': 'spbelect',
638 'uploader': 'Наблюдатели Петербурга',
641 'skip_download': 'requires avconv',
643 'skip': 'This live event has ended.',
645 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
647 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
651 'title': 'md5:7b81415841e02ecd4313668cde88737a',
652 'description': 'md5:116377fd2963b81ec4ce64b542173306',
653 'upload_date': '20150625',
654 'uploader_id': 'dorappi2000',
655 'uploader': 'dorappi2000',
656 'formats': 'mincount:33',
659 # DASH manifest with segment_list
661 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
662 'md5': '8ce563a1d667b599d21064e982ab9e31',
666 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
667 'uploader': 'Airtek',
668 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
669 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
670 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
673 'youtube_include_dash_manifest': True,
674 'format': '135', # bestvideo
678 # Multifeed videos (multiple cameras), URL is for Main Camera
679 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
682 'title': 'teamPGP: Rocket League Noob Stream',
683 'description': 'md5:dc7872fb300e143831327f1bae3af010',
689 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
690 'description': 'md5:dc7872fb300e143831327f1bae3af010',
691 'upload_date': '20150721',
692 'uploader': 'Beer Games Beer',
693 'uploader_id': 'beergamesbeer',
699 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
700 'description': 'md5:dc7872fb300e143831327f1bae3af010',
701 'upload_date': '20150721',
702 'uploader': 'Beer Games Beer',
703 'uploader_id': 'beergamesbeer',
709 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
710 'description': 'md5:dc7872fb300e143831327f1bae3af010',
711 'upload_date': '20150721',
712 'uploader': 'Beer Games Beer',
713 'uploader_id': 'beergamesbeer',
719 'title': 'teamPGP: Rocket League Noob Stream (zim)',
720 'description': 'md5:dc7872fb300e143831327f1bae3af010',
721 'upload_date': '20150721',
722 'uploader': 'Beer Games Beer',
723 'uploader_id': 'beergamesbeer',
727 'skip_download': True,
731 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
732 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
735 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
740 'url': 'http://vid.plus/FlRa-iH7PGw',
741 'only_matching': True,
744 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
745 # Also tests cut-off URL expansion in video description (see
746 # https://github.com/rg3/youtube-dl/issues/1892,
747 # https://github.com/rg3/youtube-dl/issues/8164)
748 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
752 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
753 'alt_title': 'Dark Walk',
754 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
755 'upload_date': '20151119',
756 'uploader_id': 'IronSoulElf',
757 'uploader': 'IronSoulElf',
758 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
761 'skip_download': True,
765 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
766 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
767 'only_matching': True,
770 # Video with yt:stretch=17:0
771 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
775 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
776 'description': 'md5:ee18a25c350637c8faff806845bddee9',
777 'upload_date': '20151107',
778 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
779 'uploader': 'CH GAMER DROID',
782 'skip_download': True,
786 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
787 'only_matching': True,
791 def __init__(self, *args, **kwargs):
792 super(YoutubeIE, self).__init__(*args, **kwargs)
793 self._player_cache = {}
795 def report_video_info_webpage_download(self, video_id):
796 """Report attempt to download video info webpage."""
797 self.to_screen('%s: Downloading video info webpage' % video_id)
799 def report_information_extraction(self, video_id):
800 """Report attempt to extract video information."""
801 self.to_screen('%s: Extracting video information' % video_id)
803 def report_unavailable_format(self, video_id, format):
804 """Report extracted video URL."""
805 self.to_screen('%s: Format %s not available' % (video_id, format))
807 def report_rtmp_download(self):
808 """Indicate the download will use the RTMP protocol."""
809 self.to_screen('RTMP download detected')
811 def _signature_cache_id(self, example_sig):
812 """ Return a string representation of a signature """
813 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
815 def _extract_signature_function(self, video_id, player_url, example_sig):
817 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
820 raise ExtractorError('Cannot identify player %r' % player_url)
821 player_type = id_m.group('ext')
822 player_id = id_m.group('id')
824 # Read from filesystem cache
825 func_id = '%s_%s_%s' % (
826 player_type, player_id, self._signature_cache_id(example_sig))
827 assert os.path.basename(func_id) == func_id
829 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
830 if cache_spec is not None:
831 return lambda s: ''.join(s[i] for i in cache_spec)
834 'Downloading player %s' % player_url
835 if self._downloader.params.get('verbose') else
836 'Downloading %s player %s' % (player_type, player_id)
838 if player_type == 'js':
839 code = self._download_webpage(
840 player_url, video_id,
842 errnote='Download of %s failed' % player_url)
843 res = self._parse_sig_js(code)
844 elif player_type == 'swf':
845 urlh = self._request_webpage(
846 player_url, video_id,
848 errnote='Download of %s failed' % player_url)
850 res = self._parse_sig_swf(code)
852 assert False, 'Invalid player type %r' % player_type
854 test_string = ''.join(map(compat_chr, range(len(example_sig))))
855 cache_res = res(test_string)
856 cache_spec = [ord(c) for c in cache_res]
858 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
861 def _print_sig_code(self, func, example_sig):
862 def gen_sig_code(idxs):
863 def _genslice(start, end, step):
864 starts = '' if start == 0 else str(start)
865 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
866 steps = '' if step == 1 else (':%d' % step)
867 return 's[%s%s%s]' % (starts, ends, steps)
870 # Quelch pyflakes warnings - start will be set when step is set
871 start = '(Never used)'
872 for i, prev in zip(idxs[1:], idxs[:-1]):
876 yield _genslice(start, prev, step)
879 if i - prev in [-1, 1]:
888 yield _genslice(start, i, step)
890 test_string = ''.join(map(compat_chr, range(len(example_sig))))
891 cache_res = func(test_string)
892 cache_spec = [ord(c) for c in cache_res]
893 expr_code = ' + '.join(gen_sig_code(cache_spec))
894 signature_id_tuple = '(%s)' % (
895 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
896 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
897 ' return %s\n') % (signature_id_tuple, expr_code)
898 self.to_screen('Extracted signature function:\n' + code)
900 def _parse_sig_js(self, jscode):
901 funcname = self._search_regex(
902 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
903 'Initial JS player signature function name')
905 jsi = JSInterpreter(jscode)
906 initial_function = jsi.extract_function(funcname)
907 return lambda s: initial_function([s])
909 def _parse_sig_swf(self, file_contents):
910 swfi = SWFInterpreter(file_contents)
911 TARGET_CLASSNAME = 'SignatureDecipher'
912 searched_class = swfi.extract_class(TARGET_CLASSNAME)
913 initial_function = swfi.extract_function(searched_class, 'decipher')
914 return lambda s: initial_function([s])
916 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
917 """Turn the encrypted s field into a working signature"""
919 if player_url is None:
920 raise ExtractorError('Cannot decrypt signature without player_url')
922 if player_url.startswith('//'):
923 player_url = 'https:' + player_url
925 player_id = (player_url, self._signature_cache_id(s))
926 if player_id not in self._player_cache:
927 func = self._extract_signature_function(
928 video_id, player_url, s
930 self._player_cache[player_id] = func
931 func = self._player_cache[player_id]
932 if self._downloader.params.get('youtube_print_sig_code'):
933 self._print_sig_code(func, s)
935 except Exception as e:
936 tb = traceback.format_exc()
937 raise ExtractorError(
938 'Signature extraction failed: ' + tb, cause=e)
940 def _get_subtitles(self, video_id, webpage):
942 subs_doc = self._download_xml(
943 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
944 video_id, note=False)
945 except ExtractorError as err:
946 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
950 for track in subs_doc.findall('track'):
951 lang = track.attrib['lang_code']
952 if lang in sub_lang_list:
955 for ext in self._SUBTITLE_FORMATS:
956 params = compat_urllib_parse.urlencode({
960 'name': track.attrib['name'].encode('utf-8'),
963 'url': 'https://www.youtube.com/api/timedtext?' + params,
966 sub_lang_list[lang] = sub_formats
967 if not sub_lang_list:
968 self._downloader.report_warning('video doesn\'t have subtitles')
972 def _get_ytplayer_config(self, video_id, webpage):
974 # User data may contain arbitrary character sequences that may affect
975 # JSON extraction with regex, e.g. when '};' is contained the second
976 # regex won't capture the whole JSON. Yet working around by trying more
977 # concrete regex first keeping in mind proper quoted string handling
978 # to be implemented in future that will replace this workaround (see
979 # https://github.com/rg3/youtube-dl/issues/7468,
980 # https://github.com/rg3/youtube-dl/pull/7599)
981 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
982 r';ytplayer\.config\s*=\s*({.+?});',
984 config = self._search_regex(
985 patterns, webpage, 'ytplayer.config', default=None)
987 return self._parse_json(
988 uppercase_escape(config), video_id, fatal=False)
990 def _get_automatic_captions(self, video_id, webpage):
991 """We need the webpage for getting the captions url, pass it as an
992 argument to speed up the process."""
993 self.to_screen('%s: Looking for automatic captions' % video_id)
994 player_config = self._get_ytplayer_config(video_id, webpage)
995 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
996 if not player_config:
997 self._downloader.report_warning(err_msg)
1000 args = player_config['args']
1001 caption_url = args.get('ttsurl')
1003 timestamp = args['timestamp']
1004 # We get the available subtitles
1005 list_params = compat_urllib_parse.urlencode({
1010 list_url = caption_url + '&' + list_params
1011 caption_list = self._download_xml(list_url, video_id)
1012 original_lang_node = caption_list.find('track')
1013 if original_lang_node is None:
1014 self._downloader.report_warning('Video doesn\'t have automatic captions')
1016 original_lang = original_lang_node.attrib['lang_code']
1017 caption_kind = original_lang_node.attrib.get('kind', '')
1020 for lang_node in caption_list.findall('target'):
1021 sub_lang = lang_node.attrib['lang_code']
1023 for ext in self._SUBTITLE_FORMATS:
1024 params = compat_urllib_parse.urlencode({
1025 'lang': original_lang,
1029 'kind': caption_kind,
1031 sub_formats.append({
1032 'url': caption_url + '&' + params,
1035 sub_lang_list[sub_lang] = sub_formats
1036 return sub_lang_list
1038 # Some videos don't provide ttsurl but rather caption_tracks and
1039 # caption_translation_languages (e.g. 20LmZk1hakA)
1040 caption_tracks = args['caption_tracks']
1041 caption_translation_languages = args['caption_translation_languages']
1042 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1043 parsed_caption_url = compat_urlparse.urlparse(caption_url)
1044 caption_qs = compat_parse_qs(parsed_caption_url.query)
1047 for lang in caption_translation_languages.split(','):
1048 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1049 sub_lang = lang_qs.get('lc', [None])[0]
1053 for ext in self._SUBTITLE_FORMATS:
1055 'tlang': [sub_lang],
1058 sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
1059 query=compat_urllib_parse.urlencode(caption_qs, True)))
1060 sub_formats.append({
1064 sub_lang_list[sub_lang] = sub_formats
1065 return sub_lang_list
1066 # An extractor error can be raise by the download process if there are
1067 # no automatic captions but there are subtitles
1068 except (KeyError, ExtractorError):
1069 self._downloader.report_warning(err_msg)
1072 def _mark_watched(self, video_id, video_info):
1073 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1074 if not playback_url:
1076 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1077 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1079 # cpn generation algorithm is reverse engineered from base.js.
1080 # In fact it works even with dummy cpn.
1081 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1082 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1088 playback_url = compat_urlparse.urlunparse(
1089 parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
1091 self._download_webpage(
1092 playback_url, video_id, 'Marking watched',
1093 'Unable to mark watched', fatal=False)
1096 def extract_id(cls, url):
1097 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1099 raise ExtractorError('Invalid URL: %s' % url)
1100 video_id = mobj.group(2)
1103 def _extract_from_m3u8(self, manifest_url, video_id):
1106 def _get_urls(_manifest):
1107 lines = _manifest.split('\n')
1108 urls = filter(lambda l: l and not l.startswith('#'),
1111 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1112 formats_urls = _get_urls(manifest)
1113 for format_url in formats_urls:
1114 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1115 url_map[itag] = format_url
1118 def _extract_annotations(self, video_id):
1119 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1120 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1122 def _real_extract(self, url):
1123 url, smuggled_data = unsmuggle_url(url, {})
1126 'http' if self._downloader.params.get('prefer_insecure', False)
1131 parsed_url = compat_urllib_parse_urlparse(url)
1132 for component in [parsed_url.fragment, parsed_url.query]:
1133 query = compat_parse_qs(component)
1134 if start_time is None and 't' in query:
1135 start_time = parse_duration(query['t'][0])
1136 if start_time is None and 'start' in query:
1137 start_time = parse_duration(query['start'][0])
1138 if end_time is None and 'end' in query:
1139 end_time = parse_duration(query['end'][0])
1141 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1142 mobj = re.search(self._NEXT_URL_RE, url)
1144 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1145 video_id = self.extract_id(url)
1148 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1149 video_webpage = self._download_webpage(url, video_id)
1151 # Attempt to extract SWF player URL
1152 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1153 if mobj is not None:
1154 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1160 def add_dash_mpd(video_info):
1161 dash_mpd = video_info.get('dashmpd')
1162 if dash_mpd and dash_mpd[0] not in dash_mpds:
1163 dash_mpds.append(dash_mpd[0])
1166 embed_webpage = None
1168 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1170 # We simulate the access to the video from www.youtube.com/v/{video_id}
1171 # this can be viewed without login into Youtube
1172 url = proto + '://www.youtube.com/embed/%s' % video_id
1173 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1174 data = compat_urllib_parse.urlencode({
1175 'video_id': video_id,
1176 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1177 'sts': self._search_regex(
1178 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1180 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1181 video_info_webpage = self._download_webpage(
1182 video_info_url, video_id,
1183 note='Refetching age-gated info webpage',
1184 errnote='unable to download video info webpage')
1185 video_info = compat_parse_qs(video_info_webpage)
1186 add_dash_mpd(video_info)
1190 # Try looking directly into the video webpage
1191 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1193 args = ytplayer_config['args']
1194 if args.get('url_encoded_fmt_stream_map'):
1195 # Convert to the same format returned by compat_parse_qs
1196 video_info = dict((k, [v]) for k, v in args.items())
1197 add_dash_mpd(video_info)
1198 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1200 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1201 # We also try looking in get_video_info since it may contain different dashmpd
1202 # URL that points to a DASH manifest with possibly different itag set (some itags
1203 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1204 # manifest pointed by get_video_info's dashmpd).
1205 # The general idea is to take a union of itags of both DASH manifests (for example
1206 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1207 self.report_video_info_webpage_download(video_id)
1208 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1210 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1211 % (proto, video_id, el_type))
1212 video_info_webpage = self._download_webpage(
1214 video_id, note=False,
1215 errnote='unable to download video info webpage')
1216 get_video_info = compat_parse_qs(video_info_webpage)
1217 if get_video_info.get('use_cipher_signature') != ['True']:
1218 add_dash_mpd(get_video_info)
1220 video_info = get_video_info
1221 if 'token' in get_video_info:
1222 # Different get_video_info requests may report different results, e.g.
1223 # some may report video unavailability, but some may serve it without
1224 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1225 # the original webpage as well as el=info and el=embedded get_video_info
1226 # requests report video unavailability due to geo restriction while
1227 # el=detailpage succeeds and returns valid data). This is probably
1228 # due to YouTube measures against IP ranges of hosting providers.
1229 # Working around by preferring the first succeeded video_info containing
1230 # the token if no such video_info yet was found.
1231 if 'token' not in video_info:
1232 video_info = get_video_info
1234 if 'token' not in video_info:
1235 if 'reason' in video_info:
1236 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1237 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1239 raise ExtractorError('YouTube said: This video is available in %s only' % (
1240 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1242 raise ExtractorError(
1243 'YouTube said: %s' % video_info['reason'][0],
1244 expected=True, video_id=video_id)
1246 raise ExtractorError(
1247 '"token" parameter not in video info for unknown reason',
1251 if 'title' in video_info:
1252 video_title = video_info['title'][0]
1254 self._downloader.report_warning('Unable to extract video title')
1258 video_description = get_element_by_id("eow-description", video_webpage)
1259 if video_description:
1260 video_description = re.sub(r'''(?x)
1262 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1263 (?:title|href)="([^"]+)"\s+
1264 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1265 class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
1268 ''', r'\1', video_description)
1269 video_description = clean_html(video_description)
1271 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1273 video_description = unescapeHTML(fd_mobj.group(1))
1275 video_description = ''
1277 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1278 if not self._downloader.params.get('noplaylist'):
1281 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1282 for feed in multifeed_metadata_list.split(','):
1283 # Unquote should take place before split on comma (,) since textual
1284 # fields may contain comma as well (see
1285 # https://github.com/rg3/youtube-dl/issues/8536)
1286 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1288 '_type': 'url_transparent',
1289 'ie_key': 'Youtube',
1291 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1292 {'force_singlefeed': True}),
1293 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1295 feed_ids.append(feed_data['id'][0])
1297 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1298 % (', '.join(feed_ids), video_id))
1299 return self.playlist_result(entries, video_id, video_title, video_description)
1300 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1302 if 'view_count' in video_info:
1303 view_count = int(video_info['view_count'][0])
1307 # Check for "rental" videos
1308 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1309 raise ExtractorError('"rental" videos not supported')
1311 # Start extracting information
1312 self.report_information_extraction(video_id)
1315 if 'author' not in video_info:
1316 raise ExtractorError('Unable to extract uploader name')
1317 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1320 video_uploader_id = None
1321 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1322 if mobj is not None:
1323 video_uploader_id = mobj.group(1)
1325 self._downloader.report_warning('unable to extract uploader nickname')
1328 # We try first to get a high quality image:
1329 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1330 video_webpage, re.DOTALL)
1331 if m_thumb is not None:
1332 video_thumbnail = m_thumb.group(1)
1333 elif 'thumbnail_url' not in video_info:
1334 self._downloader.report_warning('unable to extract video thumbnail')
1335 video_thumbnail = None
1336 else: # don't panic if we can't find it
1337 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1340 upload_date = self._html_search_meta(
1341 'datePublished', video_webpage, 'upload date', default=None)
1343 upload_date = self._search_regex(
1344 [r'(?s)id="eow-date.*?>(.*?)</span>',
1345 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1346 video_webpage, 'upload date', default=None)
1348 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1349 upload_date = unified_strdate(upload_date)
1351 m_music = re.search(
1352 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1355 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1356 video_creator = clean_html(m_music.group('creator'))
1358 video_alt_title = video_creator = None
1360 m_cat_container = self._search_regex(
1361 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1362 video_webpage, 'categories', default=None)
1364 category = self._html_search_regex(
1365 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1367 video_categories = None if category is None else [category]
1369 video_categories = None
1372 unescapeHTML(m.group('content'))
1373 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1375 def _extract_count(count_name):
1376 return str_to_int(self._search_regex(
1377 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1378 % re.escape(count_name),
1379 video_webpage, count_name, default=None))
1381 like_count = _extract_count('like')
1382 dislike_count = _extract_count('dislike')
1385 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1386 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1388 if 'length_seconds' not in video_info:
1389 self._downloader.report_warning('unable to extract video duration')
1390 video_duration = None
1392 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1395 video_annotations = None
1396 if self._downloader.params.get('writeannotations', False):
1397 video_annotations = self._extract_annotations(video_id)
1399 def _map_to_format_list(urlmap):
1401 for itag, video_real_url in urlmap.items():
1404 'url': video_real_url,
1405 'player_url': player_url,
1407 if itag in self._formats:
1408 dct.update(self._formats[itag])
1412 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1413 self.report_rtmp_download()
1415 'format_id': '_rtmp',
1417 'url': video_info['conn'][0],
1418 'player_url': player_url,
1420 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1421 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1422 if 'rtmpe%3Dyes' in encoded_url_map:
1423 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1424 fmt_list = video_info.get('fmt_list', [''])[0]
1426 for fmt in fmt_list.split(','):
1427 spec = fmt.split('/')
1428 width, height = spec[1].split('x')
1429 self._formats[spec[0]].update({
1430 'resolution': spec[1],
1431 'width': int_or_none(width),
1432 'height': int_or_none(height),
1435 for url_data_str in encoded_url_map.split(','):
1436 url_data = compat_parse_qs(url_data_str)
1437 if 'itag' not in url_data or 'url' not in url_data:
1439 format_id = url_data['itag'][0]
1440 url = url_data['url'][0]
1442 if 'sig' in url_data:
1443 url += '&signature=' + url_data['sig'][0]
1444 elif 's' in url_data:
1445 encrypted_sig = url_data['s'][0]
1446 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1448 jsplayer_url_json = self._search_regex(
1450 embed_webpage if age_gate else video_webpage,
1451 'JS player URL (1)', default=None)
1452 if not jsplayer_url_json and not age_gate:
1453 # We need the embed website after all
1454 if embed_webpage is None:
1455 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1456 embed_webpage = self._download_webpage(
1457 embed_url, video_id, 'Downloading embed webpage')
1458 jsplayer_url_json = self._search_regex(
1459 ASSETS_RE, embed_webpage, 'JS player URL')
1461 player_url = json.loads(jsplayer_url_json)
1462 if player_url is None:
1463 player_url_json = self._search_regex(
1464 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1465 video_webpage, 'age gate player URL')
1466 player_url = json.loads(player_url_json)
1468 if self._downloader.params.get('verbose'):
1469 if player_url is None:
1470 player_version = 'unknown'
1471 player_desc = 'unknown'
1473 if player_url.endswith('swf'):
1474 player_version = self._search_regex(
1475 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1476 'flash player', fatal=False)
1477 player_desc = 'flash player %s' % player_version
1479 player_version = self._search_regex(
1480 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1482 'html5 player', fatal=False)
1483 player_desc = 'html5 player %s' % player_version
1485 parts_sizes = self._signature_cache_id(encrypted_sig)
1486 self.to_screen('{%s} signature length %s, %s' %
1487 (format_id, parts_sizes, player_desc))
1489 signature = self._decrypt_signature(
1490 encrypted_sig, video_id, player_url, age_gate)
1491 url += '&signature=' + signature
1492 if 'ratebypass' not in url:
1493 url += '&ratebypass=yes'
1496 'format_id': format_id,
1498 'player_url': player_url,
1500 if format_id in self._formats:
1501 dct.update(self._formats[format_id])
1503 # Some itags are not included in DASH manifest thus corresponding formats will
1504 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1505 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1506 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1507 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1510 'filesize': int_or_none(url_data.get('clen', [None])[0]),
1511 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1514 'fps': int_or_none(url_data.get('fps', [None])[0]),
1515 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1517 for key, value in more_fields.items():
1520 type_ = url_data.get('type', [None])[0]
1522 type_split = type_.split(';')
1523 kind_ext = type_split[0].split('/')
1524 if len(kind_ext) == 2:
1526 dct['ext'] = mimetype2ext(type_split[0])
1527 if kind in ('audio', 'video'):
1529 for mobj in re.finditer(
1530 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1531 if mobj.group('key') == 'codecs':
1532 codecs = mobj.group('val')
1535 codecs = codecs.split(',')
1536 if len(codecs) == 2:
1537 acodec, vcodec = codecs[1], codecs[0]
1539 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1545 elif video_info.get('hlsvp'):
1546 manifest_url = video_info['hlsvp'][0]
1547 url_map = self._extract_from_m3u8(manifest_url, video_id)
1548 formats = _map_to_format_list(url_map)
1549 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1550 for a_format in formats:
1551 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1553 unavailable_message = self._html_search_regex(
1554 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1555 video_webpage, 'unavailable message', default=None)
1556 if unavailable_message:
1557 raise ExtractorError(unavailable_message, expected=True)
1558 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1560 # Look for the DASH manifest
1561 if self._downloader.params.get('youtube_include_dash_manifest', True):
1562 dash_mpd_fatal = True
1563 for mpd_url in dash_mpds:
1566 def decrypt_sig(mobj):
1568 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1569 return '/signature/%s' % dec_s
1571 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
1573 for df in self._extract_mpd_formats(
1574 mpd_url, video_id, fatal=dash_mpd_fatal,
1575 formats_dict=self._formats):
1576 # Do not overwrite DASH format found in some previous DASH manifest
1577 if df['format_id'] not in dash_formats:
1578 dash_formats[df['format_id']] = df
1579 # Additional DASH manifests may end up in HTTP Error 403 therefore
1580 # allow them to fail without bug report message if we already have
1581 # some DASH manifest succeeded. This is temporary workaround to reduce
1582 # burst of bug reports until we figure out the reason and whether it
1583 # can be fixed at all.
1584 dash_mpd_fatal = False
1585 except (ExtractorError, KeyError) as e:
1586 self.report_warning(
1587 'Skipping DASH manifest: %r' % e, video_id)
1589 # Remove the formats we found through non-DASH, they
1590 # contain less info and it can be wrong, because we use
1591 # fixed values (for example the resolution). See
1592 # https://github.com/rg3/youtube-dl/issues/5774 for an
1594 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1595 formats.extend(dash_formats.values())
1597 # Check for malformed aspect ratio
1598 stretched_m = re.search(
1599 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1602 w = float(stretched_m.group('w'))
1603 h = float(stretched_m.group('h'))
1604 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1605 # We will only process correct ratios.
1609 if f.get('vcodec') != 'none':
1610 f['stretched_ratio'] = ratio
1612 self._sort_formats(formats)
1614 self.mark_watched(video_id, video_info)
1618 'uploader': video_uploader,
1619 'uploader_id': video_uploader_id,
1620 'upload_date': upload_date,
1621 'creator': video_creator,
1622 'title': video_title,
1623 'alt_title': video_alt_title,
1624 'thumbnail': video_thumbnail,
1625 'description': video_description,
1626 'categories': video_categories,
1628 'subtitles': video_subtitles,
1629 'automatic_captions': automatic_captions,
1630 'duration': video_duration,
1631 'age_limit': 18 if age_gate else 0,
1632 'annotations': video_annotations,
1633 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1634 'view_count': view_count,
1635 'like_count': like_count,
1636 'dislike_count': dislike_count,
1637 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1640 'start_time': start_time,
1641 'end_time': end_time,
1645 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
1646 IE_DESC = 'YouTube.com playlists'
1647 _VALID_URL = r"""(?x)(?:
1652 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1653 \? (?:.*?[&;])*? (?:p|a|list)=
1657 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1658 # Top tracks, they can also include dots
1663 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1665 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1666 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1667 IE_NAME = 'youtube:playlist'
1669 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1671 'title': 'ytdl test PL',
1672 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1674 'playlist_count': 3,
1676 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1678 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1679 'title': 'YDL_Empty_List',
1681 'playlist_count': 0,
1683 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1684 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1686 'title': '29C3: Not my department',
1687 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1689 'playlist_count': 95,
1691 'note': 'issue #673',
1692 'url': 'PLBB231211A4F62143',
1694 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1695 'id': 'PLBB231211A4F62143',
1697 'playlist_mincount': 26,
1699 'note': 'Large playlist',
1700 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1702 'title': 'Uploads from Cauchemar',
1703 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1705 'playlist_mincount': 799,
1707 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1709 'title': 'YDL_safe_search',
1710 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1712 'playlist_count': 2,
1715 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1716 'playlist_count': 4,
1719 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1722 'note': 'Embedded SWF player',
1723 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1724 'playlist_count': 4,
1727 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1730 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1731 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1733 'title': 'Uploads from Interstellar Movie',
1734 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1736 'playlist_mincout': 21,
1739 def _real_initialize(self):
1742 def _extract_mix(self, playlist_id):
1743 # The mixes are generated from a single video
1744 # the id of the playlist is just 'RD' + video_id
1745 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1746 webpage = self._download_webpage(
1747 url, playlist_id, 'Downloading Youtube mix')
1748 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1750 search_title('playlist-title') or
1751 search_title('title long-title') or
1752 search_title('title'))
1753 title = clean_html(title_span)
1754 ids = orderedSet(re.findall(
1755 r'''(?xs)data-video-username=".*?".*?
1756 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1758 url_results = self._ids_to_results(ids)
1760 return self.playlist_result(url_results, playlist_id, title)
1762 def _extract_playlist(self, playlist_id):
1763 url = self._TEMPLATE_URL % playlist_id
1764 page = self._download_webpage(url, playlist_id)
1766 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1767 match = match.strip()
1768 # Check if the playlist exists or is private
1769 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1770 raise ExtractorError(
1771 'The playlist doesn\'t exist or is private, use --username or '
1772 '--netrc to access it.',
1774 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1775 raise ExtractorError(
1776 'Invalid parameters. Maybe URL is incorrect.',
1778 elif re.match(r'[^<]*Choose your language[^<]*', match):
1781 self.report_warning('Youtube gives an alert message: ' + match)
1783 playlist_title = self._html_search_regex(
1784 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1787 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1789 def _check_download_just_video(self, url, playlist_id):
1790 # Check if it's a video-specific URL
1791 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1792 if 'v' in query_dict:
1793 video_id = query_dict['v'][0]
1794 if self._downloader.params.get('noplaylist'):
1795 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1796 return self.url_result(video_id, 'Youtube', video_id=video_id)
1798 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1800 def _real_extract(self, url):
1801 # Extract playlist id
1802 mobj = re.match(self._VALID_URL, url)
1804 raise ExtractorError('Invalid URL: %s' % url)
1805 playlist_id = mobj.group(1) or mobj.group(2)
1807 video = self._check_download_just_video(url, playlist_id)
1811 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1812 # Mixes require a custom extraction process
1813 return self._extract_mix(playlist_id)
1815 return self._extract_playlist(playlist_id)
1818 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1819 IE_DESC = 'YouTube.com channels'
1820 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1821 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1822 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1823 IE_NAME = 'youtube:channel'
1825 'note': 'paginated channel',
1826 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1827 'playlist_mincount': 91,
1829 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1830 'title': 'Uploads from lex will',
1833 'note': 'Age restricted channel',
1834 # from https://www.youtube.com/user/DeusExOfficial
1835 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1836 'playlist_mincount': 64,
1838 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1839 'title': 'Uploads from Deus Ex',
1844 def suitable(cls, url):
1845 return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
1847 def _real_extract(self, url):
1848 channel_id = self._match_id(url)
1850 url = self._TEMPLATE_URL % channel_id
1852 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1853 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1854 # otherwise fallback on channel by page extraction
1855 channel_page = self._download_webpage(
1856 url + '?view=57', channel_id,
1857 'Downloading channel page', fatal=False)
1858 if channel_page is False:
1859 channel_playlist_id = False
1861 channel_playlist_id = self._html_search_meta(
1862 'channelId', channel_page, 'channel id', default=None)
1863 if not channel_playlist_id:
1864 channel_playlist_id = self._search_regex(
1865 r'data-(?:channel-external-|yt)id="([^"]+)"',
1866 channel_page, 'channel id', default=None)
1867 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1868 playlist_id = 'UU' + channel_playlist_id[2:]
1869 return self.url_result(
1870 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1872 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1873 autogenerated = re.search(r'''(?x)
1875 channel-header-autogenerated-label|
1876 yt-channel-title-autogenerated
1877 )[^"]*"''', channel_page) is not None
1880 # The videos are contained in a single page
1881 # the ajax pages can't be used, they are empty
1884 video_id, 'Youtube', video_id=video_id,
1885 video_title=video_title)
1886 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1887 return self.playlist_result(entries, channel_id)
1889 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1892 class YoutubeUserIE(YoutubeChannelIE):
1893 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1894 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1895 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1896 IE_NAME = 'youtube:user'
1899 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1900 'playlist_mincount': 320,
1902 'title': 'TheLinuxFoundation',
1905 'url': 'ytuser:phihag',
1906 'only_matching': True,
1910 def suitable(cls, url):
1911 # Don't return True if the url can be extracted with other youtube
1912 # extractor, the regex would is too permissive and it would match.
1913 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1914 if any(ie.suitable(url) for ie in other_ies):
1917 return super(YoutubeUserIE, cls).suitable(url)
1920 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1921 IE_DESC = 'YouTube.com user/channel playlists'
1922 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
1923 IE_NAME = 'youtube:playlists'
1926 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1927 'playlist_mincount': 4,
1929 'id': 'ThirstForScience',
1930 'title': 'Thirst for Science',
1933 # with "Load more" button
1934 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
1935 'playlist_mincount': 70,
1938 'title': 'Игорь Клейнер',
1941 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
1942 'playlist_mincount': 17,
1944 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
1945 'title': 'Chem Player',
1950 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1951 IE_DESC = 'YouTube.com searches'
1952 # there doesn't appear to be a real limit, for example if you search for
1953 # 'python' you get more than 8.000.000 results
1954 _MAX_RESULTS = float('inf')
1955 IE_NAME = 'youtube:search'
1956 _SEARCH_KEY = 'ytsearch'
1957 _EXTRA_QUERY_ARGS = {}
1960 def _get_n_results(self, query, n):
1961 """Get a specified number of results for a query"""
1966 for pagenum in itertools.count(1):
1968 'search_query': query.encode('utf-8'),
1972 url_query.update(self._EXTRA_QUERY_ARGS)
1973 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1974 data = self._download_json(
1975 result_url, video_id='query "%s"' % query,
1976 note='Downloading page %s' % pagenum,
1977 errnote='Unable to download API page')
1978 html_content = data[1]['body']['content']
1980 if 'class="search-message' in html_content:
1981 raise ExtractorError(
1982 '[youtube] No video results', expected=True)
1984 new_videos = self._ids_to_results(orderedSet(re.findall(
1985 r'href="/watch\?v=(.{11})', html_content)))
1986 videos += new_videos
1987 if not new_videos or len(videos) > limit:
1992 return self.playlist_result(videos, query)
1995 class YoutubeSearchDateIE(YoutubeSearchIE):
1996 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1997 _SEARCH_KEY = 'ytsearchdate'
1998 IE_DESC = 'YouTube.com searches, newest videos first'
1999 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2002 class YoutubeSearchURLIE(InfoExtractor):
2003 IE_DESC = 'YouTube.com search URLs'
2004 IE_NAME = 'youtube:search_url'
2005 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2007 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2008 'playlist_mincount': 5,
2010 'title': 'youtube-dl test video',
2013 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2014 'only_matching': True,
2017 def _real_extract(self, url):
2018 mobj = re.match(self._VALID_URL, url)
2019 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2021 webpage = self._download_webpage(url, query)
2022 result_code = self._search_regex(
2023 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
2025 part_codes = re.findall(
2026 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
2028 for part_code in part_codes:
2029 part_title = self._html_search_regex(
2030 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
2031 part_url_snippet = self._html_search_regex(
2032 r'(?s)href="([^"]+)"', part_code, 'item URL')
2033 part_url = compat_urlparse.urljoin(
2034 'https://www.youtube.com/', part_url_snippet)
2038 'title': part_title,
2042 '_type': 'playlist',
2048 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2049 IE_DESC = 'YouTube.com (multi-season) shows'
2050 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
2051 IE_NAME = 'youtube:show'
2053 'url': 'https://www.youtube.com/show/airdisasters',
2054 'playlist_mincount': 5,
2056 'id': 'airdisasters',
2057 'title': 'Air Disasters',
2061 def _real_extract(self, url):
2062 playlist_id = self._match_id(url)
2063 return super(YoutubeShowIE, self)._real_extract(
2064 'https://www.youtube.com/show/%s/playlists' % playlist_id)
2067 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2069 Base class for feed extractors
2070 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2072 _LOGIN_REQUIRED = True
2076 return 'youtube:%s' % self._FEED_NAME
2078 def _real_initialize(self):
2081 def _real_extract(self, url):
2082 page = self._download_webpage(
2083 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2085 # The extraction process is the same as for playlists, but the regex
2086 # for the video ids doesn't contain an index
2088 more_widget_html = content_html = page
2089 for page_num in itertools.count(1):
2090 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2092 # 'recommended' feed has infinite 'load more' and each new portion spins
2093 # the same videos in (sometimes) slightly different order, so we'll check
2094 # for unicity and break when portion has no new videos
2095 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2101 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2105 more = self._download_json(
2106 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2107 'Downloading page #%s' % page_num,
2108 transform_source=uppercase_escape)
2109 content_html = more['content_html']
2110 more_widget_html = more['load_more_widget_html']
2112 return self.playlist_result(
2113 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2116 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2117 IE_NAME = 'youtube:watchlater'
2118 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2119 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2122 'url': 'https://www.youtube.com/playlist?list=WL',
2123 'only_matching': True,
2125 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2126 'only_matching': True,
2129 def _real_extract(self, url):
2130 video = self._check_download_just_video(url, 'WL')
2133 return self._extract_playlist('WL')
2136 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2137 IE_NAME = 'youtube:favorites'
2138 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2139 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2140 _LOGIN_REQUIRED = True
2142 def _real_extract(self, url):
2143 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2144 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2145 return self.url_result(playlist_id, 'YoutubePlaylist')
2148 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2149 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2150 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2151 _FEED_NAME = 'recommended'
2152 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2155 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2156 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2157 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2158 _FEED_NAME = 'subscriptions'
2159 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2162 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2163 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2164 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2165 _FEED_NAME = 'history'
2166 _PLAYLIST_TITLE = 'Youtube History'
2169 class YoutubeTruncatedURLIE(InfoExtractor):
2170 IE_NAME = 'youtube:truncated_url'
2171 IE_DESC = False # Do not list
2172 _VALID_URL = r'''(?x)
2174 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2177 annotation_id=annotation_[^&]+|
2183 attribution_link\?a=[^&]+
2189 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2190 'only_matching': True,
2192 'url': 'http://www.youtube.com/watch?',
2193 'only_matching': True,
2195 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2196 'only_matching': True,
2198 'url': 'https://www.youtube.com/watch?feature=foo',
2199 'only_matching': True,
2201 'url': 'https://www.youtube.com/watch?hl=en-GB',
2202 'only_matching': True,
2204 'url': 'https://www.youtube.com/watch?t=2372',
2205 'only_matching': True,
2208 def _real_extract(self, url):
2209 raise ExtractorError(
2210 'Did you forget to quote the URL? Remember that & is a meta '
2211 'character in most shells, so you want to put the URL in quotes, '
2213 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2214 ' or simply youtube-dl BaW_jenozKc .',
2218 class YoutubeTruncatedIDIE(InfoExtractor):
2219 IE_NAME = 'youtube:truncated_id'
2220 IE_DESC = False # Do not list
2221 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2224 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2225 'only_matching': True,
2228 def _real_extract(self, url):
2229 video_id = self._match_id(url)
2230 raise ExtractorError(
2231 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),