3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
31 get_element_by_attribute,
49 class YoutubeBaseInfoExtractor(InfoExtractor):
50 """Provide base functions for Youtube extractors"""
51 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
52 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
53 _NETRC_MACHINE = 'youtube'
54 # If True it will raise an error if no login info is provided
55 _LOGIN_REQUIRED = False
57 def _set_language(self):
59 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
60 # YouTube sets the expire time to about two months
61 expire_time=time.time() + 2 * 30 * 24 * 3600)
63 def _ids_to_results(self, ids):
65 self.url_result(vid_id, 'Youtube', video_id=vid_id)
70 Attempt to log in to YouTube.
71 True is returned if successful or skipped.
72 False is returned if login failed.
74 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
76 (username, password) = self._get_login_info()
77 # No authentication to be performed
79 if self._LOGIN_REQUIRED:
80 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83 login_page = self._download_webpage(
84 self._LOGIN_URL, None,
85 note='Downloading login page',
86 errnote='unable to fetch login page', fatal=False)
87 if login_page is False:
90 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
91 login_page, 'Login GALX parameter')
95 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
100 'PersistentCookie': 'yes',
102 'bgresponse': 'js_disabled',
103 'checkConnection': '',
104 'checkedDomains': 'youtube',
111 'service': 'youtube',
116 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
118 req = sanitized_Request(self._LOGIN_URL, login_data)
119 login_results = self._download_webpage(
121 note='Logging in', errnote='unable to log in', fatal=False)
122 if login_results is False:
125 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
126 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
129 # TODO add SMS and phone call support - these require making a request and then prompting the user
131 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
132 tfa_code = self._get_tfa_info('2-step verification code')
135 self._downloader.report_warning(
136 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
137 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
140 tfa_code = remove_start(tfa_code, 'G-')
142 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
144 tfa_form_strs.update({
149 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
151 tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
152 tfa_results = self._download_webpage(
154 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
156 if tfa_results is False:
159 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
160 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
162 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
163 self._downloader.report_warning('unable to log in - did the page structure change?')
165 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
166 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
169 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
170 self._downloader.report_warning('unable to log in: bad username or password')
174 def _real_initialize(self):
175 if self._downloader is None:
178 if not self._login():
182 class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
183 # Extract entries from page with "Load more" button
184 def _entries(self, page, playlist_id):
185 more_widget_html = content_html = page
186 for page_num in itertools.count(1):
187 for entry in self._process_page(content_html):
190 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
194 more = self._download_json(
195 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
196 'Downloading page #%s' % page_num,
197 transform_source=uppercase_escape)
198 content_html = more['content_html']
199 if not content_html.strip():
200 # Some webpages show a "Load more" button but they don't
203 more_widget_html = more['load_more_widget_html']
206 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
207 def _process_page(self, content):
208 for video_id, video_title in self.extract_videos_from_page(content):
209 yield self.url_result(video_id, 'Youtube', video_id, video_title)
211 def extract_videos_from_page(self, page):
214 for mobj in re.finditer(self._VIDEO_RE, page):
215 # The link with index 0 is not the first video of the playlist (not sure if still actual)
216 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
218 video_id = mobj.group('id')
219 video_title = unescapeHTML(mobj.group('title'))
221 video_title = video_title.strip()
223 idx = ids_in_page.index(video_id)
224 if video_title and not titles_in_page[idx]:
225 titles_in_page[idx] = video_title
227 ids_in_page.append(video_id)
228 titles_in_page.append(video_title)
229 return zip(ids_in_page, titles_in_page)
232 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
233 def _process_page(self, content):
234 for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
235 yield self.url_result(
236 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
238 def _real_extract(self, url):
239 playlist_id = self._match_id(url)
240 webpage = self._download_webpage(url, playlist_id)
241 title = self._og_search_title(webpage, fatal=False)
242 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
245 class YoutubeIE(YoutubeBaseInfoExtractor):
246 IE_DESC = 'YouTube.com'
247 _VALID_URL = r"""(?x)^
249 (?:https?://|//) # http(s):// or protocol-independent URL
250 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
251 (?:www\.)?deturl\.com/www\.youtube\.com/|
252 (?:www\.)?pwnyoutube\.com/|
253 (?:www\.)?yourepeat\.com/|
254 tube\.majestyc\.net/|
255 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
256 (?:.*?\#/)? # handle anchor (#/) redirect urls
257 (?: # the various things that can precede the ID:
258 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
259 |(?: # or the v= param in all its forms
260 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
261 (?:\?|\#!?) # the params delimiter ? or # or #!
262 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
267 youtu\.be| # just youtu.be/xxxx
268 vid\.plus # or vid.plus/xxxx
270 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
272 )? # all until now is optional -> you can pass the naked ID
273 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
274 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
275 (?(1).+)? # if we found the ID, everything can follow
277 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
279 '5': {'ext': 'flv', 'width': 400, 'height': 240},
280 '6': {'ext': 'flv', 'width': 450, 'height': 270},
281 '13': {'ext': '3gp'},
282 '17': {'ext': '3gp', 'width': 176, 'height': 144},
283 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
284 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
285 '34': {'ext': 'flv', 'width': 640, 'height': 360},
286 '35': {'ext': 'flv', 'width': 854, 'height': 480},
287 '36': {'ext': '3gp', 'width': 320, 'height': 240},
288 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
289 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
290 '43': {'ext': 'webm', 'width': 640, 'height': 360},
291 '44': {'ext': 'webm', 'width': 854, 'height': 480},
292 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
293 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
294 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
295 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
299 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
300 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
301 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
302 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
303 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
304 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
305 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
307 # Apple HTTP Live Streaming
308 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
309 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
310 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
311 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
312 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
313 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
314 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
317 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
318 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
319 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
320 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
321 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
322 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
323 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
324 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
325 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
326 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
327 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
330 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
331 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
332 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
335 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
336 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
337 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
338 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
339 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
340 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
341 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
342 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
343 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
344 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
345 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
346 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
347 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
348 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
349 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
350 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
351 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
352 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
353 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
354 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
355 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
356 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
359 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
360 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
362 # Dash webm audio with opus inside
363 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
364 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
365 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
368 '_rtmp': {'protocol': 'rtmp'},
374 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
378 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
379 'uploader': 'Philipp Hagemeister',
380 'uploader_id': 'phihag',
381 'upload_date': '20121002',
382 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
383 'categories': ['Science & Technology'],
384 'tags': ['youtube-dl'],
386 'dislike_count': int,
392 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
393 'note': 'Test generic use_cipher_signature video (#897)',
397 'upload_date': '20120506',
398 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
399 'alt_title': 'I Love It (feat. Charli XCX)',
400 'description': 'md5:782e8651347686cba06e58f71ab51773',
401 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
402 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
403 'iconic ep', 'iconic', 'love', 'it'],
404 'uploader': 'Icona Pop',
405 'uploader_id': 'IconaPop',
406 'creator': 'Icona Pop',
410 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
411 'note': 'Test VEVO video with age protection (#956)',
415 'upload_date': '20130703',
416 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
417 'alt_title': 'Tunnel Vision',
418 'description': 'md5:64249768eec3bc4276236606ea996373',
419 'uploader': 'justintimberlakeVEVO',
420 'uploader_id': 'justintimberlakeVEVO',
421 'creator': 'Justin Timberlake',
426 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
427 'note': 'Embed-only video (#1746)',
431 'upload_date': '20120608',
432 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
433 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
434 'uploader': 'SET India',
435 'uploader_id': 'setindia',
440 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
441 'note': 'Use the first video ID in the URL',
445 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
446 'uploader': 'Philipp Hagemeister',
447 'uploader_id': 'phihag',
448 'upload_date': '20121002',
449 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
450 'categories': ['Science & Technology'],
451 'tags': ['youtube-dl'],
453 'dislike_count': int,
456 'skip_download': True,
460 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
461 'note': '256k DASH audio (format 141) via DASH manifest',
465 'upload_date': '20121002',
466 'uploader_id': '8KVIDEO',
468 'uploader': '8KVIDEO',
469 'title': 'UHDTV TEST 8K VIDEO.mp4'
472 'youtube_include_dash_manifest': True,
476 # DASH manifest with encrypted signature
478 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
482 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
483 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
484 'uploader': 'AfrojackVEVO',
485 'uploader_id': 'AfrojackVEVO',
486 'upload_date': '20131011',
489 'youtube_include_dash_manifest': True,
493 # JS player signature function name containing $
495 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
499 'title': 'Taylor Swift - Shake It Off',
500 'alt_title': 'Shake It Off',
501 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
502 'uploader': 'TaylorSwiftVEVO',
503 'uploader_id': 'TaylorSwiftVEVO',
504 'upload_date': '20140818',
505 'creator': 'Taylor Swift',
508 'youtube_include_dash_manifest': True,
514 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
518 'upload_date': '20100909',
519 'uploader': 'The Amazing Atheist',
520 'uploader_id': 'TheAmazingAtheist',
521 'title': 'Burning Everyone\'s Koran',
522 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
525 # Normal age-gate video (No vevo, embed allowed)
527 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
531 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
532 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
533 'uploader': 'The Witcher',
534 'uploader_id': 'WitcherGame',
535 'upload_date': '20140605',
539 # Age-gate video with encrypted signature
541 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
545 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
546 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
547 'uploader': 'LloydVEVO',
548 'uploader_id': 'LloydVEVO',
549 'upload_date': '20110629',
553 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
555 'url': '__2ABJjxzNo',
559 'upload_date': '20100430',
560 'uploader_id': 'deadmau5',
561 'creator': 'deadmau5',
562 'description': 'md5:12c56784b8032162bb936a5f76d55360',
563 'uploader': 'deadmau5',
564 'title': 'Deadmau5 - Some Chords (HD)',
565 'alt_title': 'Some Chords',
567 'expected_warnings': [
568 'DASH manifest missing',
571 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
573 'url': 'lqQg6PlCWgI',
577 'upload_date': '20150827',
578 'uploader_id': 'olympic',
579 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
580 'uploader': 'Olympics',
581 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
584 'skip_download': 'requires avconv',
589 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
593 'stretched_ratio': 16 / 9.,
594 'upload_date': '20110310',
595 'uploader_id': 'AllenMeow',
596 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
598 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
601 # url_encoded_fmt_stream_map is empty string
603 'url': 'qEJwOuvDf7I',
607 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
609 'upload_date': '20150404',
610 'uploader_id': 'spbelect',
611 'uploader': 'Наблюдатели Петербурга',
614 'skip_download': 'requires avconv',
617 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
619 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
623 'title': 'md5:7b81415841e02ecd4313668cde88737a',
624 'description': 'md5:116377fd2963b81ec4ce64b542173306',
625 'upload_date': '20150625',
626 'uploader_id': 'dorappi2000',
627 'uploader': 'dorappi2000',
628 'formats': 'mincount:33',
631 # DASH manifest with segment_list
633 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
634 'md5': '8ce563a1d667b599d21064e982ab9e31',
638 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
639 'uploader': 'Airtek',
640 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
641 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
642 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
645 'youtube_include_dash_manifest': True,
646 'format': '135', # bestvideo
650 # Multifeed videos (multiple cameras), URL is for Main Camera
651 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
654 'title': 'teamPGP: Rocket League Noob Stream',
655 'description': 'md5:dc7872fb300e143831327f1bae3af010',
661 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
662 'description': 'md5:dc7872fb300e143831327f1bae3af010',
663 'upload_date': '20150721',
664 'uploader': 'Beer Games Beer',
665 'uploader_id': 'beergamesbeer',
671 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
672 'description': 'md5:dc7872fb300e143831327f1bae3af010',
673 'upload_date': '20150721',
674 'uploader': 'Beer Games Beer',
675 'uploader_id': 'beergamesbeer',
681 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
682 'description': 'md5:dc7872fb300e143831327f1bae3af010',
683 'upload_date': '20150721',
684 'uploader': 'Beer Games Beer',
685 'uploader_id': 'beergamesbeer',
691 'title': 'teamPGP: Rocket League Noob Stream (zim)',
692 'description': 'md5:dc7872fb300e143831327f1bae3af010',
693 'upload_date': '20150721',
694 'uploader': 'Beer Games Beer',
695 'uploader_id': 'beergamesbeer',
699 'skip_download': True,
703 'url': 'http://vid.plus/FlRa-iH7PGw',
704 'only_matching': True,
707 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
708 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
712 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
713 'alt_title': 'Dark Walk',
714 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
715 'upload_date': '20151119',
716 'uploader_id': 'IronSoulElf',
717 'uploader': 'IronSoulElf',
718 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
721 'skip_download': True,
725 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
726 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
727 'only_matching': True,
730 # Video with yt:stretch=17:0
731 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
735 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
736 'description': 'md5:ee18a25c350637c8faff806845bddee9',
737 'upload_date': '20151107',
738 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
739 'uploader': 'CH GAMER DROID',
742 'skip_download': True,
746 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
747 'only_matching': True,
751 def __init__(self, *args, **kwargs):
752 super(YoutubeIE, self).__init__(*args, **kwargs)
753 self._player_cache = {}
755 def report_video_info_webpage_download(self, video_id):
756 """Report attempt to download video info webpage."""
757 self.to_screen('%s: Downloading video info webpage' % video_id)
759 def report_information_extraction(self, video_id):
760 """Report attempt to extract video information."""
761 self.to_screen('%s: Extracting video information' % video_id)
763 def report_unavailable_format(self, video_id, format):
764 """Report extracted video URL."""
765 self.to_screen('%s: Format %s not available' % (video_id, format))
767 def report_rtmp_download(self):
768 """Indicate the download will use the RTMP protocol."""
769 self.to_screen('RTMP download detected')
771 def _signature_cache_id(self, example_sig):
772 """ Return a string representation of a signature """
773 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
775 def _extract_signature_function(self, video_id, player_url, example_sig):
777 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
780 raise ExtractorError('Cannot identify player %r' % player_url)
781 player_type = id_m.group('ext')
782 player_id = id_m.group('id')
784 # Read from filesystem cache
785 func_id = '%s_%s_%s' % (
786 player_type, player_id, self._signature_cache_id(example_sig))
787 assert os.path.basename(func_id) == func_id
789 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
790 if cache_spec is not None:
791 return lambda s: ''.join(s[i] for i in cache_spec)
794 'Downloading player %s' % player_url
795 if self._downloader.params.get('verbose') else
796 'Downloading %s player %s' % (player_type, player_id)
798 if player_type == 'js':
799 code = self._download_webpage(
800 player_url, video_id,
802 errnote='Download of %s failed' % player_url)
803 res = self._parse_sig_js(code)
804 elif player_type == 'swf':
805 urlh = self._request_webpage(
806 player_url, video_id,
808 errnote='Download of %s failed' % player_url)
810 res = self._parse_sig_swf(code)
812 assert False, 'Invalid player type %r' % player_type
814 test_string = ''.join(map(compat_chr, range(len(example_sig))))
815 cache_res = res(test_string)
816 cache_spec = [ord(c) for c in cache_res]
818 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
821 def _print_sig_code(self, func, example_sig):
822 def gen_sig_code(idxs):
823 def _genslice(start, end, step):
824 starts = '' if start == 0 else str(start)
825 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
826 steps = '' if step == 1 else (':%d' % step)
827 return 's[%s%s%s]' % (starts, ends, steps)
830 # Quelch pyflakes warnings - start will be set when step is set
831 start = '(Never used)'
832 for i, prev in zip(idxs[1:], idxs[:-1]):
836 yield _genslice(start, prev, step)
839 if i - prev in [-1, 1]:
848 yield _genslice(start, i, step)
850 test_string = ''.join(map(compat_chr, range(len(example_sig))))
851 cache_res = func(test_string)
852 cache_spec = [ord(c) for c in cache_res]
853 expr_code = ' + '.join(gen_sig_code(cache_spec))
854 signature_id_tuple = '(%s)' % (
855 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
856 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
857 ' return %s\n') % (signature_id_tuple, expr_code)
858 self.to_screen('Extracted signature function:\n' + code)
860 def _parse_sig_js(self, jscode):
861 funcname = self._search_regex(
862 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
863 'Initial JS player signature function name')
865 jsi = JSInterpreter(jscode)
866 initial_function = jsi.extract_function(funcname)
867 return lambda s: initial_function([s])
869 def _parse_sig_swf(self, file_contents):
870 swfi = SWFInterpreter(file_contents)
871 TARGET_CLASSNAME = 'SignatureDecipher'
872 searched_class = swfi.extract_class(TARGET_CLASSNAME)
873 initial_function = swfi.extract_function(searched_class, 'decipher')
874 return lambda s: initial_function([s])
876 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
877 """Turn the encrypted s field into a working signature"""
879 if player_url is None:
880 raise ExtractorError('Cannot decrypt signature without player_url')
882 if player_url.startswith('//'):
883 player_url = 'https:' + player_url
885 player_id = (player_url, self._signature_cache_id(s))
886 if player_id not in self._player_cache:
887 func = self._extract_signature_function(
888 video_id, player_url, s
890 self._player_cache[player_id] = func
891 func = self._player_cache[player_id]
892 if self._downloader.params.get('youtube_print_sig_code'):
893 self._print_sig_code(func, s)
895 except Exception as e:
896 tb = traceback.format_exc()
897 raise ExtractorError(
898 'Signature extraction failed: ' + tb, cause=e)
900 def _get_subtitles(self, video_id, webpage):
902 subs_doc = self._download_xml(
903 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
904 video_id, note=False)
905 except ExtractorError as err:
906 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
910 for track in subs_doc.findall('track'):
911 lang = track.attrib['lang_code']
912 if lang in sub_lang_list:
915 for ext in ['sbv', 'vtt', 'srt']:
916 params = compat_urllib_parse.urlencode({
920 'name': track.attrib['name'].encode('utf-8'),
923 'url': 'https://www.youtube.com/api/timedtext?' + params,
926 sub_lang_list[lang] = sub_formats
927 if not sub_lang_list:
928 self._downloader.report_warning('video doesn\'t have subtitles')
932 def _get_ytplayer_config(self, video_id, webpage):
934 # User data may contain arbitrary character sequences that may affect
935 # JSON extraction with regex, e.g. when '};' is contained the second
936 # regex won't capture the whole JSON. Yet working around by trying more
937 # concrete regex first keeping in mind proper quoted string handling
938 # to be implemented in future that will replace this workaround (see
939 # https://github.com/rg3/youtube-dl/issues/7468,
940 # https://github.com/rg3/youtube-dl/pull/7599)
941 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
942 r';ytplayer\.config\s*=\s*({.+?});',
944 config = self._search_regex(
945 patterns, webpage, 'ytplayer.config', default=None)
947 return self._parse_json(
948 uppercase_escape(config), video_id, fatal=False)
950 def _get_automatic_captions(self, video_id, webpage):
951 """We need the webpage for getting the captions url, pass it as an
952 argument to speed up the process."""
953 self.to_screen('%s: Looking for automatic captions' % video_id)
954 player_config = self._get_ytplayer_config(video_id, webpage)
955 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
956 if not player_config:
957 self._downloader.report_warning(err_msg)
960 args = player_config['args']
961 caption_url = args['ttsurl']
962 timestamp = args['timestamp']
963 # We get the available subtitles
964 list_params = compat_urllib_parse.urlencode({
969 list_url = caption_url + '&' + list_params
970 caption_list = self._download_xml(list_url, video_id)
971 original_lang_node = caption_list.find('track')
972 if original_lang_node is None:
973 self._downloader.report_warning('Video doesn\'t have automatic captions')
975 original_lang = original_lang_node.attrib['lang_code']
976 caption_kind = original_lang_node.attrib.get('kind', '')
979 for lang_node in caption_list.findall('target'):
980 sub_lang = lang_node.attrib['lang_code']
982 for ext in ['sbv', 'vtt', 'srt']:
983 params = compat_urllib_parse.urlencode({
984 'lang': original_lang,
988 'kind': caption_kind,
991 'url': caption_url + '&' + params,
994 sub_lang_list[sub_lang] = sub_formats
996 # An extractor error can be raise by the download process if there are
997 # no automatic captions but there are subtitles
998 except (KeyError, ExtractorError):
999 self._downloader.report_warning(err_msg)
1003 def extract_id(cls, url):
1004 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1006 raise ExtractorError('Invalid URL: %s' % url)
1007 video_id = mobj.group(2)
1010 def _extract_from_m3u8(self, manifest_url, video_id):
1013 def _get_urls(_manifest):
1014 lines = _manifest.split('\n')
1015 urls = filter(lambda l: l and not l.startswith('#'),
1018 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1019 formats_urls = _get_urls(manifest)
1020 for format_url in formats_urls:
1021 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1022 url_map[itag] = format_url
1025 def _extract_annotations(self, video_id):
1026 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1027 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1029 def _parse_dash_manifest(
1030 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
1031 def decrypt_sig(mobj):
1033 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1034 return '/signature/%s' % dec_s
1035 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
1036 dash_doc = self._download_xml(
1037 dash_manifest_url, video_id,
1038 note='Downloading DASH manifest',
1039 errnote='Could not download DASH manifest',
1042 if dash_doc is False:
1046 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
1047 mime_type = a.attrib.get('mimeType')
1048 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1049 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1052 if mime_type == 'text/vtt':
1053 # TODO implement WebVTT downloading
1055 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
1056 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
1057 format_id = r.attrib['id']
1058 video_url = url_el.text
1059 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1061 'format_id': format_id,
1063 'width': int_or_none(r.attrib.get('width')),
1064 'height': int_or_none(r.attrib.get('height')),
1065 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1066 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1067 'filesize': filesize,
1068 'fps': int_or_none(r.attrib.get('frameRate')),
1070 if segment_list is not None:
1072 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1073 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1074 'protocol': 'http_dash_segments',
1077 existing_format = next(
1078 fo for fo in formats
1079 if fo['format_id'] == format_id)
1080 except StopIteration:
1081 full_info = self._formats.get(format_id, {}).copy()
1083 codecs = r.attrib.get('codecs')
1085 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1086 full_info['vcodec'] = codecs
1087 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1088 full_info['acodec'] = codecs
1089 formats.append(full_info)
1091 existing_format.update(f)
1093 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1096 def _real_extract(self, url):
1097 url, smuggled_data = unsmuggle_url(url, {})
1100 'http' if self._downloader.params.get('prefer_insecure', False)
1105 parsed_url = compat_urllib_parse_urlparse(url)
1106 for component in [parsed_url.fragment, parsed_url.query]:
1107 query = compat_parse_qs(component)
1108 if start_time is None and 't' in query:
1109 start_time = parse_duration(query['t'][0])
1110 if start_time is None and 'start' in query:
1111 start_time = parse_duration(query['start'][0])
1112 if end_time is None and 'end' in query:
1113 end_time = parse_duration(query['end'][0])
1115 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1116 mobj = re.search(self._NEXT_URL_RE, url)
1118 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1119 video_id = self.extract_id(url)
1122 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1123 video_webpage = self._download_webpage(url, video_id)
1125 # Attempt to extract SWF player URL
1126 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1127 if mobj is not None:
1128 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1134 def add_dash_mpd(video_info):
1135 dash_mpd = video_info.get('dashmpd')
1136 if dash_mpd and dash_mpd[0] not in dash_mpds:
1137 dash_mpds.append(dash_mpd[0])
1140 embed_webpage = None
1142 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1144 # We simulate the access to the video from www.youtube.com/v/{video_id}
1145 # this can be viewed without login into Youtube
1146 url = proto + '://www.youtube.com/embed/%s' % video_id
1147 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1148 data = compat_urllib_parse.urlencode({
1149 'video_id': video_id,
1150 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1151 'sts': self._search_regex(
1152 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1154 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1155 video_info_webpage = self._download_webpage(
1156 video_info_url, video_id,
1157 note='Refetching age-gated info webpage',
1158 errnote='unable to download video info webpage')
1159 video_info = compat_parse_qs(video_info_webpage)
1160 add_dash_mpd(video_info)
1164 # Try looking directly into the video webpage
1165 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1167 args = ytplayer_config['args']
1168 if args.get('url_encoded_fmt_stream_map'):
1169 # Convert to the same format returned by compat_parse_qs
1170 video_info = dict((k, [v]) for k, v in args.items())
1171 add_dash_mpd(video_info)
1172 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1174 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1175 # We also try looking in get_video_info since it may contain different dashmpd
1176 # URL that points to a DASH manifest with possibly different itag set (some itags
1177 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1178 # manifest pointed by get_video_info's dashmpd).
1179 # The general idea is to take a union of itags of both DASH manifests (for example
1180 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1181 self.report_video_info_webpage_download(video_id)
1182 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1184 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1185 % (proto, video_id, el_type))
1186 video_info_webpage = self._download_webpage(
1188 video_id, note=False,
1189 errnote='unable to download video info webpage')
1190 get_video_info = compat_parse_qs(video_info_webpage)
1191 if get_video_info.get('use_cipher_signature') != ['True']:
1192 add_dash_mpd(get_video_info)
1194 video_info = get_video_info
1195 if 'token' in get_video_info:
1196 # Different get_video_info requests may report different results, e.g.
1197 # some may report video unavailability, but some may serve it without
1198 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1199 # the original webpage as well as el=info and el=embedded get_video_info
1200 # requests report video unavailability due to geo restriction while
1201 # el=detailpage succeeds and returns valid data). This is probably
1202 # due to YouTube measures against IP ranges of hosting providers.
1203 # Working around by preferring the first succeeded video_info containing
1204 # the token if no such video_info yet was found.
1205 if 'token' not in video_info:
1206 video_info = get_video_info
1208 if 'token' not in video_info:
1209 if 'reason' in video_info:
1210 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1211 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1213 raise ExtractorError('YouTube said: This video is available in %s only' % (
1214 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1216 raise ExtractorError(
1217 'YouTube said: %s' % video_info['reason'][0],
1218 expected=True, video_id=video_id)
1220 raise ExtractorError(
1221 '"token" parameter not in video info for unknown reason',
1225 if 'title' in video_info:
1226 video_title = video_info['title'][0]
1228 self._downloader.report_warning('Unable to extract video title')
1232 video_description = get_element_by_id("eow-description", video_webpage)
1233 if video_description:
1234 video_description = re.sub(r'''(?x)
1236 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1238 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1239 class="yt-uix-redirect-link"\s*>
1242 ''', r'\1', video_description)
1243 video_description = clean_html(video_description)
1245 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1247 video_description = unescapeHTML(fd_mobj.group(1))
1249 video_description = ''
1251 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1252 if not self._downloader.params.get('noplaylist'):
1255 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1256 for feed in multifeed_metadata_list.split(','):
1257 feed_data = compat_parse_qs(feed)
1259 '_type': 'url_transparent',
1260 'ie_key': 'Youtube',
1262 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1263 {'force_singlefeed': True}),
1264 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1266 feed_ids.append(feed_data['id'][0])
1268 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1269 % (', '.join(feed_ids), video_id))
1270 return self.playlist_result(entries, video_id, video_title, video_description)
1271 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1273 if 'view_count' in video_info:
1274 view_count = int(video_info['view_count'][0])
1278 # Check for "rental" videos
1279 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1280 raise ExtractorError('"rental" videos not supported')
1282 # Start extracting information
1283 self.report_information_extraction(video_id)
1286 if 'author' not in video_info:
1287 raise ExtractorError('Unable to extract uploader name')
1288 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1291 video_uploader_id = None
1292 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1293 if mobj is not None:
1294 video_uploader_id = mobj.group(1)
1296 self._downloader.report_warning('unable to extract uploader nickname')
1299 # We try first to get a high quality image:
1300 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1301 video_webpage, re.DOTALL)
1302 if m_thumb is not None:
1303 video_thumbnail = m_thumb.group(1)
1304 elif 'thumbnail_url' not in video_info:
1305 self._downloader.report_warning('unable to extract video thumbnail')
1306 video_thumbnail = None
1307 else: # don't panic if we can't find it
1308 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1311 upload_date = self._html_search_meta(
1312 'datePublished', video_webpage, 'upload date', default=None)
1314 upload_date = self._search_regex(
1315 [r'(?s)id="eow-date.*?>(.*?)</span>',
1316 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1317 video_webpage, 'upload date', default=None)
1319 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1320 upload_date = unified_strdate(upload_date)
1322 m_music = re.search(
1323 r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1326 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1327 video_creator = clean_html(m_music.group('creator'))
1329 video_alt_title = video_creator = None
1331 m_cat_container = self._search_regex(
1332 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1333 video_webpage, 'categories', default=None)
1335 category = self._html_search_regex(
1336 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1338 video_categories = None if category is None else [category]
1340 video_categories = None
1343 unescapeHTML(m.group('content'))
1344 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1346 def _extract_count(count_name):
1347 return str_to_int(self._search_regex(
1348 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1349 % re.escape(count_name),
1350 video_webpage, count_name, default=None))
1352 like_count = _extract_count('like')
1353 dislike_count = _extract_count('dislike')
1356 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1357 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1359 if 'length_seconds' not in video_info:
1360 self._downloader.report_warning('unable to extract video duration')
1361 video_duration = None
1363 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1366 video_annotations = None
1367 if self._downloader.params.get('writeannotations', False):
1368 video_annotations = self._extract_annotations(video_id)
1370 def _map_to_format_list(urlmap):
1372 for itag, video_real_url in urlmap.items():
1375 'url': video_real_url,
1376 'player_url': player_url,
1378 if itag in self._formats:
1379 dct.update(self._formats[itag])
1383 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1384 self.report_rtmp_download()
1386 'format_id': '_rtmp',
1388 'url': video_info['conn'][0],
1389 'player_url': player_url,
1391 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1392 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1393 if 'rtmpe%3Dyes' in encoded_url_map:
1394 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1396 for url_data_str in encoded_url_map.split(','):
1397 url_data = compat_parse_qs(url_data_str)
1398 if 'itag' not in url_data or 'url' not in url_data:
1400 format_id = url_data['itag'][0]
1401 url = url_data['url'][0]
1403 if 'sig' in url_data:
1404 url += '&signature=' + url_data['sig'][0]
1405 elif 's' in url_data:
1406 encrypted_sig = url_data['s'][0]
1407 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1409 jsplayer_url_json = self._search_regex(
1411 embed_webpage if age_gate else video_webpage,
1412 'JS player URL (1)', default=None)
1413 if not jsplayer_url_json and not age_gate:
1414 # We need the embed website after all
1415 if embed_webpage is None:
1416 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1417 embed_webpage = self._download_webpage(
1418 embed_url, video_id, 'Downloading embed webpage')
1419 jsplayer_url_json = self._search_regex(
1420 ASSETS_RE, embed_webpage, 'JS player URL')
1422 player_url = json.loads(jsplayer_url_json)
1423 if player_url is None:
1424 player_url_json = self._search_regex(
1425 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1426 video_webpage, 'age gate player URL')
1427 player_url = json.loads(player_url_json)
1429 if self._downloader.params.get('verbose'):
1430 if player_url is None:
1431 player_version = 'unknown'
1432 player_desc = 'unknown'
1434 if player_url.endswith('swf'):
1435 player_version = self._search_regex(
1436 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1437 'flash player', fatal=False)
1438 player_desc = 'flash player %s' % player_version
1440 player_version = self._search_regex(
1441 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1443 'html5 player', fatal=False)
1444 player_desc = 'html5 player %s' % player_version
1446 parts_sizes = self._signature_cache_id(encrypted_sig)
1447 self.to_screen('{%s} signature length %s, %s' %
1448 (format_id, parts_sizes, player_desc))
1450 signature = self._decrypt_signature(
1451 encrypted_sig, video_id, player_url, age_gate)
1452 url += '&signature=' + signature
1453 if 'ratebypass' not in url:
1454 url += '&ratebypass=yes'
1456 # Some itags are not included in DASH manifest thus corresponding formats will
1457 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1458 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1459 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1460 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1462 'format_id': format_id,
1464 'player_url': player_url,
1465 'filesize': int_or_none(url_data.get('clen', [None])[0]),
1466 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1469 'fps': int_or_none(url_data.get('fps', [None])[0]),
1470 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1472 type_ = url_data.get('type', [None])[0]
1474 type_split = type_.split(';')
1475 kind_ext = type_split[0].split('/')
1476 if len(kind_ext) == 2:
1477 kind, ext = kind_ext
1479 if kind in ('audio', 'video'):
1481 for mobj in re.finditer(
1482 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1483 if mobj.group('key') == 'codecs':
1484 codecs = mobj.group('val')
1487 codecs = codecs.split(',')
1488 if len(codecs) == 2:
1489 acodec, vcodec = codecs[0], codecs[1]
1491 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1496 if format_id in self._formats:
1497 dct.update(self._formats[format_id])
1499 elif video_info.get('hlsvp'):
1500 manifest_url = video_info['hlsvp'][0]
1501 url_map = self._extract_from_m3u8(manifest_url, video_id)
1502 formats = _map_to_format_list(url_map)
1503 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1504 for a_format in formats:
1505 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1507 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1509 # Look for the DASH manifest
1510 if self._downloader.params.get('youtube_include_dash_manifest', True):
1511 dash_mpd_fatal = True
1512 for dash_manifest_url in dash_mpds:
1515 for df in self._parse_dash_manifest(
1516 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1517 # Do not overwrite DASH format found in some previous DASH manifest
1518 if df['format_id'] not in dash_formats:
1519 dash_formats[df['format_id']] = df
1520 # Additional DASH manifests may end up in HTTP Error 403 therefore
1521 # allow them to fail without bug report message if we already have
1522 # some DASH manifest succeeded. This is temporary workaround to reduce
1523 # burst of bug reports until we figure out the reason and whether it
1524 # can be fixed at all.
1525 dash_mpd_fatal = False
1526 except (ExtractorError, KeyError) as e:
1527 self.report_warning(
1528 'Skipping DASH manifest: %r' % e, video_id)
1530 # Remove the formats we found through non-DASH, they
1531 # contain less info and it can be wrong, because we use
1532 # fixed values (for example the resolution). See
1533 # https://github.com/rg3/youtube-dl/issues/5774 for an
1535 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1536 formats.extend(dash_formats.values())
1538 # Check for malformed aspect ratio
1539 stretched_m = re.search(
1540 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1543 w = float(stretched_m.group('w'))
1544 h = float(stretched_m.group('h'))
1545 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1546 # We will only process correct ratios.
1550 if f.get('vcodec') != 'none':
1551 f['stretched_ratio'] = ratio
1553 self._sort_formats(formats)
1557 'uploader': video_uploader,
1558 'uploader_id': video_uploader_id,
1559 'upload_date': upload_date,
1560 'creator': video_creator,
1561 'title': video_title,
1562 'alt_title': video_alt_title,
1563 'thumbnail': video_thumbnail,
1564 'description': video_description,
1565 'categories': video_categories,
1567 'subtitles': video_subtitles,
1568 'automatic_captions': automatic_captions,
1569 'duration': video_duration,
1570 'age_limit': 18 if age_gate else 0,
1571 'annotations': video_annotations,
1572 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1573 'view_count': view_count,
1574 'like_count': like_count,
1575 'dislike_count': dislike_count,
1576 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1579 'start_time': start_time,
1580 'end_time': end_time,
1584 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1585 IE_DESC = 'YouTube.com playlists'
1586 _VALID_URL = r"""(?x)(?:
1591 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1592 \? (?:.*?[&;])*? (?:p|a|list)=
1596 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1597 # Top tracks, they can also include dots
1602 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1604 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1605 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1606 IE_NAME = 'youtube:playlist'
1608 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1610 'title': 'ytdl test PL',
1611 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1613 'playlist_count': 3,
1615 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1617 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1618 'title': 'YDL_Empty_List',
1620 'playlist_count': 0,
1622 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1623 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1625 'title': '29C3: Not my department',
1626 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1628 'playlist_count': 95,
1630 'note': 'issue #673',
1631 'url': 'PLBB231211A4F62143',
1633 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1634 'id': 'PLBB231211A4F62143',
1636 'playlist_mincount': 26,
1638 'note': 'Large playlist',
1639 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1641 'title': 'Uploads from Cauchemar',
1642 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1644 'playlist_mincount': 799,
1646 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1648 'title': 'YDL_safe_search',
1649 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1651 'playlist_count': 2,
1654 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1655 'playlist_count': 4,
1658 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1661 'note': 'Embedded SWF player',
1662 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1663 'playlist_count': 4,
1666 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1669 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1670 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1672 'title': 'Uploads from Interstellar Movie',
1673 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1675 'playlist_mincout': 21,
1678 def _real_initialize(self):
1681 def _extract_mix(self, playlist_id):
1682 # The mixes are generated from a single video
1683 # the id of the playlist is just 'RD' + video_id
1684 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1685 webpage = self._download_webpage(
1686 url, playlist_id, 'Downloading Youtube mix')
1687 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1689 search_title('playlist-title') or
1690 search_title('title long-title') or
1691 search_title('title'))
1692 title = clean_html(title_span)
1693 ids = orderedSet(re.findall(
1694 r'''(?xs)data-video-username=".*?".*?
1695 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1697 url_results = self._ids_to_results(ids)
1699 return self.playlist_result(url_results, playlist_id, title)
1701 def _extract_playlist(self, playlist_id):
1702 url = self._TEMPLATE_URL % playlist_id
1703 page = self._download_webpage(url, playlist_id)
1705 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1706 match = match.strip()
1707 # Check if the playlist exists or is private
1708 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1709 raise ExtractorError(
1710 'The playlist doesn\'t exist or is private, use --username or '
1711 '--netrc to access it.',
1713 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1714 raise ExtractorError(
1715 'Invalid parameters. Maybe URL is incorrect.',
1717 elif re.match(r'[^<]*Choose your language[^<]*', match):
1720 self.report_warning('Youtube gives an alert message: ' + match)
1722 playlist_title = self._html_search_regex(
1723 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1726 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1728 def _real_extract(self, url):
1729 # Extract playlist id
1730 mobj = re.match(self._VALID_URL, url)
1732 raise ExtractorError('Invalid URL: %s' % url)
1733 playlist_id = mobj.group(1) or mobj.group(2)
1735 # Check if it's a video-specific URL
1736 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1737 if 'v' in query_dict:
1738 video_id = query_dict['v'][0]
1739 if self._downloader.params.get('noplaylist'):
1740 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1741 return self.url_result(video_id, 'Youtube', video_id=video_id)
1743 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1745 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1746 # Mixes require a custom extraction process
1747 return self._extract_mix(playlist_id)
1749 return self._extract_playlist(playlist_id)
1752 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1753 IE_DESC = 'YouTube.com channels'
1754 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1755 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1756 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1757 IE_NAME = 'youtube:channel'
1759 'note': 'paginated channel',
1760 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1761 'playlist_mincount': 91,
1763 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1764 'title': 'Uploads from lex will',
1767 'note': 'Age restricted channel',
1768 # from https://www.youtube.com/user/DeusExOfficial
1769 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1770 'playlist_mincount': 64,
1772 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1773 'title': 'Uploads from Deus Ex',
1777 def _real_extract(self, url):
1778 channel_id = self._match_id(url)
1780 url = self._TEMPLATE_URL % channel_id
1782 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1783 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1784 # otherwise fallback on channel by page extraction
1785 channel_page = self._download_webpage(
1786 url + '?view=57', channel_id,
1787 'Downloading channel page', fatal=False)
1788 if channel_page is False:
1789 channel_playlist_id = False
1791 channel_playlist_id = self._html_search_meta(
1792 'channelId', channel_page, 'channel id', default=None)
1793 if not channel_playlist_id:
1794 channel_playlist_id = self._search_regex(
1795 r'data-(?:channel-external-|yt)id="([^"]+)"',
1796 channel_page, 'channel id', default=None)
1797 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1798 playlist_id = 'UU' + channel_playlist_id[2:]
1799 return self.url_result(
1800 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1802 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1803 autogenerated = re.search(r'''(?x)
1805 channel-header-autogenerated-label|
1806 yt-channel-title-autogenerated
1807 )[^"]*"''', channel_page) is not None
1810 # The videos are contained in a single page
1811 # the ajax pages can't be used, they are empty
1814 video_id, 'Youtube', video_id=video_id,
1815 video_title=video_title)
1816 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1817 return self.playlist_result(entries, channel_id)
1819 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1822 class YoutubeUserIE(YoutubeChannelIE):
1823 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1824 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1825 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1826 IE_NAME = 'youtube:user'
1829 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1830 'playlist_mincount': 320,
1832 'title': 'TheLinuxFoundation',
1835 'url': 'ytuser:phihag',
1836 'only_matching': True,
1840 def suitable(cls, url):
1841 # Don't return True if the url can be extracted with other youtube
1842 # extractor, the regex would is too permissive and it would match.
1843 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1844 if any(ie.suitable(url) for ie in other_ies):
1847 return super(YoutubeUserIE, cls).suitable(url)
1850 class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1851 IE_DESC = 'YouTube.com user playlists'
1852 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
1853 IE_NAME = 'youtube:user:playlists'
1856 'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1857 'playlist_mincount': 4,
1859 'id': 'ThirstForScience',
1860 'title': 'Thirst for Science',
1863 # with "Load more" button
1864 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
1865 'playlist_mincount': 70,
1868 'title': 'Игорь Клейнер',
1873 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1874 IE_DESC = 'YouTube.com searches'
1875 # there doesn't appear to be a real limit, for example if you search for
1876 # 'python' you get more than 8.000.000 results
1877 _MAX_RESULTS = float('inf')
1878 IE_NAME = 'youtube:search'
1879 _SEARCH_KEY = 'ytsearch'
1880 _EXTRA_QUERY_ARGS = {}
1883 def _get_n_results(self, query, n):
1884 """Get a specified number of results for a query"""
1889 for pagenum in itertools.count(1):
1891 'search_query': query.encode('utf-8'),
1895 url_query.update(self._EXTRA_QUERY_ARGS)
1896 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1897 data = self._download_json(
1898 result_url, video_id='query "%s"' % query,
1899 note='Downloading page %s' % pagenum,
1900 errnote='Unable to download API page')
1901 html_content = data[1]['body']['content']
1903 if 'class="search-message' in html_content:
1904 raise ExtractorError(
1905 '[youtube] No video results', expected=True)
1907 new_videos = self._ids_to_results(orderedSet(re.findall(
1908 r'href="/watch\?v=(.{11})', html_content)))
1909 videos += new_videos
1910 if not new_videos or len(videos) > limit:
1915 return self.playlist_result(videos, query)
1918 class YoutubeSearchDateIE(YoutubeSearchIE):
1919 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1920 _SEARCH_KEY = 'ytsearchdate'
1921 IE_DESC = 'YouTube.com searches, newest videos first'
1922 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1925 class YoutubeSearchURLIE(InfoExtractor):
1926 IE_DESC = 'YouTube.com search URLs'
1927 IE_NAME = 'youtube:search_url'
1928 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1930 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1931 'playlist_mincount': 5,
1933 'title': 'youtube-dl test video',
1937 def _real_extract(self, url):
1938 mobj = re.match(self._VALID_URL, url)
1939 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1941 webpage = self._download_webpage(url, query)
1942 result_code = self._search_regex(
1943 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1945 part_codes = re.findall(
1946 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1948 for part_code in part_codes:
1949 part_title = self._html_search_regex(
1950 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1951 part_url_snippet = self._html_search_regex(
1952 r'(?s)href="([^"]+)"', part_code, 'item URL')
1953 part_url = compat_urlparse.urljoin(
1954 'https://www.youtube.com/', part_url_snippet)
1958 'title': part_title,
1962 '_type': 'playlist',
1968 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
1969 IE_DESC = 'YouTube.com (multi-season) shows'
1970 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1971 IE_NAME = 'youtube:show'
1973 'url': 'https://www.youtube.com/show/airdisasters',
1974 'playlist_mincount': 5,
1976 'id': 'airdisasters',
1977 'title': 'Air Disasters',
1981 def _real_extract(self, url):
1982 playlist_id = self._match_id(url)
1983 return super(YoutubeShowIE, self)._real_extract(
1984 'https://www.youtube.com/show/%s/playlists' % playlist_id)
1987 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1989 Base class for feed extractors
1990 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1992 _LOGIN_REQUIRED = True
1996 return 'youtube:%s' % self._FEED_NAME
1998 def _real_initialize(self):
2001 def _real_extract(self, url):
2002 page = self._download_webpage(
2003 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2005 # The extraction process is the same as for playlists, but the regex
2006 # for the video ids doesn't contain an index
2008 more_widget_html = content_html = page
2009 for page_num in itertools.count(1):
2010 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2012 # 'recommended' feed has infinite 'load more' and each new portion spins
2013 # the same videos in (sometimes) slightly different order, so we'll check
2014 # for unicity and break when portion has no new videos
2015 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2021 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2025 more = self._download_json(
2026 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2027 'Downloading page #%s' % page_num,
2028 transform_source=uppercase_escape)
2029 content_html = more['content_html']
2030 more_widget_html = more['load_more_widget_html']
2032 return self.playlist_result(
2033 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2036 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2037 IE_NAME = 'youtube:watchlater'
2038 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2039 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
2041 _TESTS = [] # override PlaylistIE tests
2043 def _real_extract(self, url):
2044 return self._extract_playlist('WL')
2047 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2048 IE_NAME = 'youtube:favorites'
2049 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2050 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2051 _LOGIN_REQUIRED = True
2053 def _real_extract(self, url):
2054 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2055 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2056 return self.url_result(playlist_id, 'YoutubePlaylist')
2059 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2060 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2061 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2062 _FEED_NAME = 'recommended'
2063 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2066 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2067 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2068 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2069 _FEED_NAME = 'subscriptions'
2070 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2073 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2074 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2075 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2076 _FEED_NAME = 'history'
2077 _PLAYLIST_TITLE = 'Youtube History'
2080 class YoutubeTruncatedURLIE(InfoExtractor):
2081 IE_NAME = 'youtube:truncated_url'
2082 IE_DESC = False # Do not list
2083 _VALID_URL = r'''(?x)
2085 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2088 annotation_id=annotation_[^&]+|
2094 attribution_link\?a=[^&]+
2100 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2101 'only_matching': True,
2103 'url': 'http://www.youtube.com/watch?',
2104 'only_matching': True,
2106 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2107 'only_matching': True,
2109 'url': 'https://www.youtube.com/watch?feature=foo',
2110 'only_matching': True,
2112 'url': 'https://www.youtube.com/watch?hl=en-GB',
2113 'only_matching': True,
2115 'url': 'https://www.youtube.com/watch?t=2372',
2116 'only_matching': True,
2119 def _real_extract(self, url):
2120 raise ExtractorError(
2121 'Did you forget to quote the URL? Remember that & is a meta '
2122 'character in most shells, so you want to put the URL in quotes, '
2124 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2125 ' or simply youtube-dl BaW_jenozKc .',
2129 class YoutubeTruncatedIDIE(InfoExtractor):
2130 IE_NAME = 'youtube:truncated_id'
2131 IE_DESC = False # Do not list
2132 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2135 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2136 'only_matching': True,
2139 def _real_extract(self, url):
2140 video_id = self._match_id(url)
2141 raise ExtractorError(
2142 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),