3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
32 get_element_by_attribute,
48 class YoutubeBaseInfoExtractor(InfoExtractor):
49 """Provide base functions for Youtube extractors"""
50 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
51 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
52 _NETRC_MACHINE = 'youtube'
53 # If True it will raise an error if no login info is provided
54 _LOGIN_REQUIRED = False
56 def _set_language(self):
58 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
59 # YouTube sets the expire time to about two months
60 expire_time=time.time() + 2 * 30 * 24 * 3600)
62 def _ids_to_results(self, ids):
64 self.url_result(vid_id, 'Youtube', video_id=vid_id)
69 Attempt to log in to YouTube.
70 True is returned if successful or skipped.
71 False is returned if login failed.
73 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
75 (username, password) = self._get_login_info()
76 # No authentication to be performed
78 if self._LOGIN_REQUIRED:
79 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
82 login_page = self._download_webpage(
83 self._LOGIN_URL, None,
84 note='Downloading login page',
85 errnote='unable to fetch login page', fatal=False)
86 if login_page is False:
89 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
90 login_page, 'Login GALX parameter')
94 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
99 'PersistentCookie': 'yes',
101 'bgresponse': 'js_disabled',
102 'checkConnection': '',
103 'checkedDomains': 'youtube',
110 'service': 'youtube',
115 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
117 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
118 login_results = self._download_webpage(
120 note='Logging in', errnote='unable to log in', fatal=False)
121 if login_results is False:
124 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
125 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
128 # TODO add SMS and phone call support - these require making a request and then prompting the user
130 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
131 tfa_code = self._get_tfa_info('2-step verification code')
134 self._downloader.report_warning(
135 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
136 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 tfa_code = remove_start(tfa_code, 'G-')
141 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
143 tfa_form_strs.update({
148 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
150 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
151 tfa_results = self._download_webpage(
153 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
155 if tfa_results is False:
158 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
159 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
161 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
162 self._downloader.report_warning('unable to log in - did the page structure change?')
164 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
165 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
168 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
169 self._downloader.report_warning('unable to log in: bad username or password')
173 def _real_initialize(self):
174 if self._downloader is None:
177 if not self._login():
181 class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
182 # Extract the video ids from the playlist pages
183 def _entries(self, page, playlist_id):
184 more_widget_html = content_html = page
185 for page_num in itertools.count(1):
186 for video_id, video_title in self.extract_videos_from_page(content_html):
187 yield self.url_result(
188 video_id, 'Youtube', video_id=video_id,
189 video_title=video_title)
191 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
195 more = self._download_json(
196 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
197 'Downloading page #%s' % page_num,
198 transform_source=uppercase_escape)
199 content_html = more['content_html']
200 if not content_html.strip():
201 # Some webpages show a "Load more" button but they don't
204 more_widget_html = more['load_more_widget_html']
206 def extract_videos_from_page(self, page):
209 for mobj in re.finditer(self._VIDEO_RE, page):
210 # The link with index 0 is not the first video of the playlist (not sure if still actual)
211 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
213 video_id = mobj.group('id')
214 video_title = unescapeHTML(mobj.group('title'))
216 video_title = video_title.strip()
218 idx = ids_in_page.index(video_id)
219 if video_title and not titles_in_page[idx]:
220 titles_in_page[idx] = video_title
222 ids_in_page.append(video_id)
223 titles_in_page.append(video_title)
224 return zip(ids_in_page, titles_in_page)
227 class YoutubeIE(YoutubeBaseInfoExtractor):
228 IE_DESC = 'YouTube.com'
229 _VALID_URL = r"""(?x)^
231 (?:https?://|//) # http(s):// or protocol-independent URL
232 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
233 (?:www\.)?deturl\.com/www\.youtube\.com/|
234 (?:www\.)?pwnyoutube\.com/|
235 (?:www\.)?yourepeat\.com/|
236 tube\.majestyc\.net/|
237 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
238 (?:.*?\#/)? # handle anchor (#/) redirect urls
239 (?: # the various things that can precede the ID:
240 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
241 |(?: # or the v= param in all its forms
242 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
243 (?:\?|\#!?) # the params delimiter ? or # or #!
244 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
249 youtu\.be| # just youtu.be/xxxx
250 vid\.plus # or vid.plus/xxxx
252 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
254 )? # all until now is optional -> you can pass the naked ID
255 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
256 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
257 (?(1).+)? # if we found the ID, everything can follow
259 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
261 '5': {'ext': 'flv', 'width': 400, 'height': 240},
262 '6': {'ext': 'flv', 'width': 450, 'height': 270},
263 '13': {'ext': '3gp'},
264 '17': {'ext': '3gp', 'width': 176, 'height': 144},
265 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
266 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
267 '34': {'ext': 'flv', 'width': 640, 'height': 360},
268 '35': {'ext': 'flv', 'width': 854, 'height': 480},
269 '36': {'ext': '3gp', 'width': 320, 'height': 240},
270 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
271 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
272 '43': {'ext': 'webm', 'width': 640, 'height': 360},
273 '44': {'ext': 'webm', 'width': 854, 'height': 480},
274 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
275 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
276 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
277 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
281 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
282 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
283 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
284 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
285 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
286 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
287 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
289 # Apple HTTP Live Streaming
290 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
291 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
292 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
293 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
294 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
295 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
296 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
299 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
303 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
304 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
305 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
306 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
307 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
308 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
309 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
312 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
313 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
314 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
317 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
318 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
319 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
320 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
321 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
322 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
323 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
324 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
325 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
326 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
327 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
328 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
329 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
330 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
331 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
332 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
333 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
334 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
335 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
336 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
337 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
340 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
341 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
343 # Dash webm audio with opus inside
344 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
345 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
346 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
349 '_rtmp': {'protocol': 'rtmp'},
355 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
359 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
360 'uploader': 'Philipp Hagemeister',
361 'uploader_id': 'phihag',
362 'upload_date': '20121002',
363 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
364 'categories': ['Science & Technology'],
365 'tags': ['youtube-dl'],
367 'dislike_count': int,
373 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
374 'note': 'Test generic use_cipher_signature video (#897)',
378 'upload_date': '20120506',
379 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
380 'description': 'md5:782e8651347686cba06e58f71ab51773',
381 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
382 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
383 'iconic ep', 'iconic', 'love', 'it'],
384 'uploader': 'Icona Pop',
385 'uploader_id': 'IconaPop',
389 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
390 'note': 'Test VEVO video with age protection (#956)',
394 'upload_date': '20130703',
395 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
396 'description': 'md5:64249768eec3bc4276236606ea996373',
397 'uploader': 'justintimberlakeVEVO',
398 'uploader_id': 'justintimberlakeVEVO',
403 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
404 'note': 'Embed-only video (#1746)',
408 'upload_date': '20120608',
409 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
410 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
411 'uploader': 'SET India',
412 'uploader_id': 'setindia'
416 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
417 'note': 'Use the first video ID in the URL',
421 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
422 'uploader': 'Philipp Hagemeister',
423 'uploader_id': 'phihag',
424 'upload_date': '20121002',
425 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
426 'categories': ['Science & Technology'],
427 'tags': ['youtube-dl'],
429 'dislike_count': int,
432 'skip_download': True,
436 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
437 'note': '256k DASH audio (format 141) via DASH manifest',
441 'upload_date': '20121002',
442 'uploader_id': '8KVIDEO',
444 'uploader': '8KVIDEO',
445 'title': 'UHDTV TEST 8K VIDEO.mp4'
448 'youtube_include_dash_manifest': True,
452 # DASH manifest with encrypted signature
454 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
458 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
459 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
460 'uploader': 'AfrojackVEVO',
461 'uploader_id': 'AfrojackVEVO',
462 'upload_date': '20131011',
465 'youtube_include_dash_manifest': True,
469 # JS player signature function name containing $
471 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
475 'title': 'Taylor Swift - Shake It Off',
476 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
477 'uploader': 'TaylorSwiftVEVO',
478 'uploader_id': 'TaylorSwiftVEVO',
479 'upload_date': '20140818',
482 'youtube_include_dash_manifest': True,
488 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
492 'upload_date': '20100909',
493 'uploader': 'The Amazing Atheist',
494 'uploader_id': 'TheAmazingAtheist',
495 'title': 'Burning Everyone\'s Koran',
496 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
499 # Normal age-gate video (No vevo, embed allowed)
501 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
505 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
506 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
507 'uploader': 'The Witcher',
508 'uploader_id': 'WitcherGame',
509 'upload_date': '20140605',
513 # Age-gate video with encrypted signature
515 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
519 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
520 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
521 'uploader': 'LloydVEVO',
522 'uploader_id': 'LloydVEVO',
523 'upload_date': '20110629',
527 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
529 'url': '__2ABJjxzNo',
533 'upload_date': '20100430',
534 'uploader_id': 'deadmau5',
535 'description': 'md5:12c56784b8032162bb936a5f76d55360',
536 'uploader': 'deadmau5',
537 'title': 'Deadmau5 - Some Chords (HD)',
539 'expected_warnings': [
540 'DASH manifest missing',
543 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
545 'url': 'lqQg6PlCWgI',
549 'upload_date': '20120724',
550 'uploader_id': 'olympic',
551 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
552 'uploader': 'Olympics',
553 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
556 'skip_download': 'requires avconv',
561 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
565 'stretched_ratio': 16 / 9.,
566 'upload_date': '20110310',
567 'uploader_id': 'AllenMeow',
568 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
570 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
573 # url_encoded_fmt_stream_map is empty string
575 'url': 'qEJwOuvDf7I',
579 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
581 'upload_date': '20150404',
582 'uploader_id': 'spbelect',
583 'uploader': 'Наблюдатели Петербурга',
586 'skip_download': 'requires avconv',
589 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
591 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
595 'title': 'md5:7b81415841e02ecd4313668cde88737a',
596 'description': 'md5:116377fd2963b81ec4ce64b542173306',
597 'upload_date': '20150625',
598 'uploader_id': 'dorappi2000',
599 'uploader': 'dorappi2000',
600 'formats': 'mincount:33',
603 # DASH manifest with segment_list
605 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
606 'md5': '8ce563a1d667b599d21064e982ab9e31',
610 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
611 'uploader': 'Airtek',
612 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
613 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
614 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
617 'youtube_include_dash_manifest': True,
618 'format': '135', # bestvideo
622 # Multifeed videos (multiple cameras), URL is for Main Camera
623 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
626 'title': 'teamPGP: Rocket League Noob Stream',
627 'description': 'md5:dc7872fb300e143831327f1bae3af010',
633 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
634 'description': 'md5:dc7872fb300e143831327f1bae3af010',
635 'upload_date': '20150721',
636 'uploader': 'Beer Games Beer',
637 'uploader_id': 'beergamesbeer',
643 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
644 'description': 'md5:dc7872fb300e143831327f1bae3af010',
645 'upload_date': '20150721',
646 'uploader': 'Beer Games Beer',
647 'uploader_id': 'beergamesbeer',
653 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
654 'description': 'md5:dc7872fb300e143831327f1bae3af010',
655 'upload_date': '20150721',
656 'uploader': 'Beer Games Beer',
657 'uploader_id': 'beergamesbeer',
663 'title': 'teamPGP: Rocket League Noob Stream (zim)',
664 'description': 'md5:dc7872fb300e143831327f1bae3af010',
665 'upload_date': '20150721',
666 'uploader': 'Beer Games Beer',
667 'uploader_id': 'beergamesbeer',
671 'skip_download': True,
675 'url': 'http://vid.plus/FlRa-iH7PGw',
676 'only_matching': True,
679 # Title with JS-like syntax "};"
680 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
684 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
685 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
686 'upload_date': '20151119',
687 'uploader_id': 'IronSoulElf',
688 'uploader': 'IronSoulElf',
691 'skip_download': True,
696 def __init__(self, *args, **kwargs):
697 super(YoutubeIE, self).__init__(*args, **kwargs)
698 self._player_cache = {}
700 def report_video_info_webpage_download(self, video_id):
701 """Report attempt to download video info webpage."""
702 self.to_screen('%s: Downloading video info webpage' % video_id)
704 def report_information_extraction(self, video_id):
705 """Report attempt to extract video information."""
706 self.to_screen('%s: Extracting video information' % video_id)
708 def report_unavailable_format(self, video_id, format):
709 """Report extracted video URL."""
710 self.to_screen('%s: Format %s not available' % (video_id, format))
712 def report_rtmp_download(self):
713 """Indicate the download will use the RTMP protocol."""
714 self.to_screen('RTMP download detected')
716 def _signature_cache_id(self, example_sig):
717 """ Return a string representation of a signature """
718 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
720 def _extract_signature_function(self, video_id, player_url, example_sig):
722 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
725 raise ExtractorError('Cannot identify player %r' % player_url)
726 player_type = id_m.group('ext')
727 player_id = id_m.group('id')
729 # Read from filesystem cache
730 func_id = '%s_%s_%s' % (
731 player_type, player_id, self._signature_cache_id(example_sig))
732 assert os.path.basename(func_id) == func_id
734 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
735 if cache_spec is not None:
736 return lambda s: ''.join(s[i] for i in cache_spec)
739 'Downloading player %s' % player_url
740 if self._downloader.params.get('verbose') else
741 'Downloading %s player %s' % (player_type, player_id)
743 if player_type == 'js':
744 code = self._download_webpage(
745 player_url, video_id,
747 errnote='Download of %s failed' % player_url)
748 res = self._parse_sig_js(code)
749 elif player_type == 'swf':
750 urlh = self._request_webpage(
751 player_url, video_id,
753 errnote='Download of %s failed' % player_url)
755 res = self._parse_sig_swf(code)
757 assert False, 'Invalid player type %r' % player_type
759 test_string = ''.join(map(compat_chr, range(len(example_sig))))
760 cache_res = res(test_string)
761 cache_spec = [ord(c) for c in cache_res]
763 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
766 def _print_sig_code(self, func, example_sig):
767 def gen_sig_code(idxs):
768 def _genslice(start, end, step):
769 starts = '' if start == 0 else str(start)
770 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
771 steps = '' if step == 1 else (':%d' % step)
772 return 's[%s%s%s]' % (starts, ends, steps)
775 # Quelch pyflakes warnings - start will be set when step is set
776 start = '(Never used)'
777 for i, prev in zip(idxs[1:], idxs[:-1]):
781 yield _genslice(start, prev, step)
784 if i - prev in [-1, 1]:
793 yield _genslice(start, i, step)
795 test_string = ''.join(map(compat_chr, range(len(example_sig))))
796 cache_res = func(test_string)
797 cache_spec = [ord(c) for c in cache_res]
798 expr_code = ' + '.join(gen_sig_code(cache_spec))
799 signature_id_tuple = '(%s)' % (
800 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
801 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
802 ' return %s\n') % (signature_id_tuple, expr_code)
803 self.to_screen('Extracted signature function:\n' + code)
805 def _parse_sig_js(self, jscode):
806 funcname = self._search_regex(
807 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
808 'Initial JS player signature function name')
810 jsi = JSInterpreter(jscode)
811 initial_function = jsi.extract_function(funcname)
812 return lambda s: initial_function([s])
814 def _parse_sig_swf(self, file_contents):
815 swfi = SWFInterpreter(file_contents)
816 TARGET_CLASSNAME = 'SignatureDecipher'
817 searched_class = swfi.extract_class(TARGET_CLASSNAME)
818 initial_function = swfi.extract_function(searched_class, 'decipher')
819 return lambda s: initial_function([s])
821 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
822 """Turn the encrypted s field into a working signature"""
824 if player_url is None:
825 raise ExtractorError('Cannot decrypt signature without player_url')
827 if player_url.startswith('//'):
828 player_url = 'https:' + player_url
830 player_id = (player_url, self._signature_cache_id(s))
831 if player_id not in self._player_cache:
832 func = self._extract_signature_function(
833 video_id, player_url, s
835 self._player_cache[player_id] = func
836 func = self._player_cache[player_id]
837 if self._downloader.params.get('youtube_print_sig_code'):
838 self._print_sig_code(func, s)
840 except Exception as e:
841 tb = traceback.format_exc()
842 raise ExtractorError(
843 'Signature extraction failed: ' + tb, cause=e)
845 def _get_subtitles(self, video_id, webpage):
847 subs_doc = self._download_xml(
848 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
849 video_id, note=False)
850 except ExtractorError as err:
851 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
855 for track in subs_doc.findall('track'):
856 lang = track.attrib['lang_code']
857 if lang in sub_lang_list:
860 for ext in ['sbv', 'vtt', 'srt']:
861 params = compat_urllib_parse.urlencode({
865 'name': track.attrib['name'].encode('utf-8'),
868 'url': 'https://www.youtube.com/api/timedtext?' + params,
871 sub_lang_list[lang] = sub_formats
872 if not sub_lang_list:
873 self._downloader.report_warning('video doesn\'t have subtitles')
877 def _get_ytplayer_config(self, webpage):
879 r';ytplayer\.config\s*=\s*({.*?});ytplayer',
880 r';ytplayer\.config\s*=\s*({.*?});',
882 config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None)
883 if config is not None:
884 return json.loads(uppercase_escape(config))
886 def _get_automatic_captions(self, video_id, webpage):
887 """We need the webpage for getting the captions url, pass it as an
888 argument to speed up the process."""
889 self.to_screen('%s: Looking for automatic captions' % video_id)
890 player_config = self._get_ytplayer_config(webpage)
891 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
892 if player_config is None:
893 self._downloader.report_warning(err_msg)
896 args = player_config['args']
897 caption_url = args['ttsurl']
898 timestamp = args['timestamp']
899 # We get the available subtitles
900 list_params = compat_urllib_parse.urlencode({
905 list_url = caption_url + '&' + list_params
906 caption_list = self._download_xml(list_url, video_id)
907 original_lang_node = caption_list.find('track')
908 if original_lang_node is None:
909 self._downloader.report_warning('Video doesn\'t have automatic captions')
911 original_lang = original_lang_node.attrib['lang_code']
912 caption_kind = original_lang_node.attrib.get('kind', '')
915 for lang_node in caption_list.findall('target'):
916 sub_lang = lang_node.attrib['lang_code']
918 for ext in ['sbv', 'vtt', 'srt']:
919 params = compat_urllib_parse.urlencode({
920 'lang': original_lang,
924 'kind': caption_kind,
927 'url': caption_url + '&' + params,
930 sub_lang_list[sub_lang] = sub_formats
932 # An extractor error can be raise by the download process if there are
933 # no automatic captions but there are subtitles
934 except (KeyError, ExtractorError):
935 self._downloader.report_warning(err_msg)
939 def extract_id(cls, url):
940 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
942 raise ExtractorError('Invalid URL: %s' % url)
943 video_id = mobj.group(2)
946 def _extract_from_m3u8(self, manifest_url, video_id):
949 def _get_urls(_manifest):
950 lines = _manifest.split('\n')
951 urls = filter(lambda l: l and not l.startswith('#'),
954 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
955 formats_urls = _get_urls(manifest)
956 for format_url in formats_urls:
957 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
958 url_map[itag] = format_url
961 def _extract_annotations(self, video_id):
962 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
963 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
965 def _parse_dash_manifest(
966 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
967 def decrypt_sig(mobj):
969 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
970 return '/signature/%s' % dec_s
971 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
972 dash_doc = self._download_xml(
973 dash_manifest_url, video_id,
974 note='Downloading DASH manifest',
975 errnote='Could not download DASH manifest',
978 if dash_doc is False:
982 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
983 mime_type = a.attrib.get('mimeType')
984 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
985 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
988 if mime_type == 'text/vtt':
989 # TODO implement WebVTT downloading
991 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
992 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
993 format_id = r.attrib['id']
994 video_url = url_el.text
995 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
997 'format_id': format_id,
999 'width': int_or_none(r.attrib.get('width')),
1000 'height': int_or_none(r.attrib.get('height')),
1001 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1002 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1003 'filesize': filesize,
1004 'fps': int_or_none(r.attrib.get('frameRate')),
1006 if segment_list is not None:
1008 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1009 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1010 'protocol': 'http_dash_segments',
1013 existing_format = next(
1014 fo for fo in formats
1015 if fo['format_id'] == format_id)
1016 except StopIteration:
1017 full_info = self._formats.get(format_id, {}).copy()
1019 codecs = r.attrib.get('codecs')
1021 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1022 full_info['vcodec'] = codecs
1023 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1024 full_info['acodec'] = codecs
1025 formats.append(full_info)
1027 existing_format.update(f)
1029 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1032 def _real_extract(self, url):
1033 url, smuggled_data = unsmuggle_url(url, {})
1036 'http' if self._downloader.params.get('prefer_insecure', False)
1041 parsed_url = compat_urllib_parse_urlparse(url)
1042 for component in [parsed_url.fragment, parsed_url.query]:
1043 query = compat_parse_qs(component)
1044 if start_time is None and 't' in query:
1045 start_time = parse_duration(query['t'][0])
1046 if start_time is None and 'start' in query:
1047 start_time = parse_duration(query['start'][0])
1048 if end_time is None and 'end' in query:
1049 end_time = parse_duration(query['end'][0])
1051 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1052 mobj = re.search(self._NEXT_URL_RE, url)
1054 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1055 video_id = self.extract_id(url)
1058 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1059 video_webpage = self._download_webpage(url, video_id)
1061 # Attempt to extract SWF player URL
1062 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1063 if mobj is not None:
1064 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1070 def add_dash_mpd(video_info):
1071 dash_mpd = video_info.get('dashmpd')
1072 if dash_mpd and dash_mpd[0] not in dash_mpds:
1073 dash_mpds.append(dash_mpd[0])
1076 embed_webpage = None
1078 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1080 # We simulate the access to the video from www.youtube.com/v/{video_id}
1081 # this can be viewed without login into Youtube
1082 url = proto + '://www.youtube.com/embed/%s' % video_id
1083 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1084 data = compat_urllib_parse.urlencode({
1085 'video_id': video_id,
1086 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1087 'sts': self._search_regex(
1088 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1090 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1091 video_info_webpage = self._download_webpage(
1092 video_info_url, video_id,
1093 note='Refetching age-gated info webpage',
1094 errnote='unable to download video info webpage')
1095 video_info = compat_parse_qs(video_info_webpage)
1096 add_dash_mpd(video_info)
1100 # Try looking directly into the video webpage
1101 ytplayer_config = self._get_ytplayer_config(video_webpage)
1102 if ytplayer_config is not None:
1103 args = ytplayer_config['args']
1104 if args.get('url_encoded_fmt_stream_map'):
1105 # Convert to the same format returned by compat_parse_qs
1106 video_info = dict((k, [v]) for k, v in args.items())
1107 add_dash_mpd(video_info)
1108 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1110 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1111 # We also try looking in get_video_info since it may contain different dashmpd
1112 # URL that points to a DASH manifest with possibly different itag set (some itags
1113 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1114 # manifest pointed by get_video_info's dashmpd).
1115 # The general idea is to take a union of itags of both DASH manifests (for example
1116 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1117 self.report_video_info_webpage_download(video_id)
1118 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1120 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1121 % (proto, video_id, el_type))
1122 video_info_webpage = self._download_webpage(
1124 video_id, note=False,
1125 errnote='unable to download video info webpage')
1126 get_video_info = compat_parse_qs(video_info_webpage)
1127 if get_video_info.get('use_cipher_signature') != ['True']:
1128 add_dash_mpd(get_video_info)
1130 video_info = get_video_info
1131 if 'token' in get_video_info:
1132 # Different get_video_info requests may report different results, e.g.
1133 # some may report video unavailability, but some may serve it without
1134 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1135 # the original webpage as well as el=info and el=embedded get_video_info
1136 # requests report video unavailability due to geo restriction while
1137 # el=detailpage succeeds and returns valid data). This is probably
1138 # due to YouTube measures against IP ranges of hosting providers.
1139 # Working around by preferring the first succeeded video_info containing
1140 # the token if no such video_info yet was found.
1141 if 'token' not in video_info:
1142 video_info = get_video_info
1144 if 'token' not in video_info:
1145 if 'reason' in video_info:
1146 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1147 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1149 raise ExtractorError('YouTube said: This video is available in %s only' % (
1150 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1152 raise ExtractorError(
1153 'YouTube said: %s' % video_info['reason'][0],
1154 expected=True, video_id=video_id)
1156 raise ExtractorError(
1157 '"token" parameter not in video info for unknown reason',
1161 if 'title' in video_info:
1162 video_title = video_info['title'][0]
1164 self._downloader.report_warning('Unable to extract video title')
1168 video_description = get_element_by_id("eow-description", video_webpage)
1169 if video_description:
1170 video_description = re.sub(r'''(?x)
1172 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1174 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1175 class="yt-uix-redirect-link"\s*>
1178 ''', r'\1', video_description)
1179 video_description = clean_html(video_description)
1181 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1183 video_description = unescapeHTML(fd_mobj.group(1))
1185 video_description = ''
1187 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1188 if not self._downloader.params.get('noplaylist'):
1191 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1192 for feed in multifeed_metadata_list.split(','):
1193 feed_data = compat_parse_qs(feed)
1195 '_type': 'url_transparent',
1196 'ie_key': 'Youtube',
1198 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1199 {'force_singlefeed': True}),
1200 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1202 feed_ids.append(feed_data['id'][0])
1204 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1205 % (', '.join(feed_ids), video_id))
1206 return self.playlist_result(entries, video_id, video_title, video_description)
1207 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1209 if 'view_count' in video_info:
1210 view_count = int(video_info['view_count'][0])
1214 # Check for "rental" videos
1215 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1216 raise ExtractorError('"rental" videos not supported')
1218 # Start extracting information
1219 self.report_information_extraction(video_id)
1222 if 'author' not in video_info:
1223 raise ExtractorError('Unable to extract uploader name')
1224 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1227 video_uploader_id = None
1228 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1229 if mobj is not None:
1230 video_uploader_id = mobj.group(1)
1232 self._downloader.report_warning('unable to extract uploader nickname')
1235 # We try first to get a high quality image:
1236 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1237 video_webpage, re.DOTALL)
1238 if m_thumb is not None:
1239 video_thumbnail = m_thumb.group(1)
1240 elif 'thumbnail_url' not in video_info:
1241 self._downloader.report_warning('unable to extract video thumbnail')
1242 video_thumbnail = None
1243 else: # don't panic if we can't find it
1244 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1247 upload_date = self._html_search_meta(
1248 'datePublished', video_webpage, 'upload date', default=None)
1250 upload_date = self._search_regex(
1251 [r'(?s)id="eow-date.*?>(.*?)</span>',
1252 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1253 video_webpage, 'upload date', default=None)
1255 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1256 upload_date = unified_strdate(upload_date)
1258 m_cat_container = self._search_regex(
1259 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1260 video_webpage, 'categories', default=None)
1262 category = self._html_search_regex(
1263 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1265 video_categories = None if category is None else [category]
1267 video_categories = None
1270 unescapeHTML(m.group('content'))
1271 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1273 def _extract_count(count_name):
1274 return str_to_int(self._search_regex(
1275 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1276 % re.escape(count_name),
1277 video_webpage, count_name, default=None))
1279 like_count = _extract_count('like')
1280 dislike_count = _extract_count('dislike')
1283 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1284 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1286 if 'length_seconds' not in video_info:
1287 self._downloader.report_warning('unable to extract video duration')
1288 video_duration = None
1290 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1293 video_annotations = None
1294 if self._downloader.params.get('writeannotations', False):
1295 video_annotations = self._extract_annotations(video_id)
1297 def _map_to_format_list(urlmap):
1299 for itag, video_real_url in urlmap.items():
1302 'url': video_real_url,
1303 'player_url': player_url,
1305 if itag in self._formats:
1306 dct.update(self._formats[itag])
1310 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1311 self.report_rtmp_download()
1313 'format_id': '_rtmp',
1315 'url': video_info['conn'][0],
1316 'player_url': player_url,
1318 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1319 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1320 if 'rtmpe%3Dyes' in encoded_url_map:
1321 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1323 for url_data_str in encoded_url_map.split(','):
1324 url_data = compat_parse_qs(url_data_str)
1325 if 'itag' not in url_data or 'url' not in url_data:
1327 format_id = url_data['itag'][0]
1328 url = url_data['url'][0]
1330 if 'sig' in url_data:
1331 url += '&signature=' + url_data['sig'][0]
1332 elif 's' in url_data:
1333 encrypted_sig = url_data['s'][0]
1334 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1336 jsplayer_url_json = self._search_regex(
1338 embed_webpage if age_gate else video_webpage,
1339 'JS player URL (1)', default=None)
1340 if not jsplayer_url_json and not age_gate:
1341 # We need the embed website after all
1342 if embed_webpage is None:
1343 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1344 embed_webpage = self._download_webpage(
1345 embed_url, video_id, 'Downloading embed webpage')
1346 jsplayer_url_json = self._search_regex(
1347 ASSETS_RE, embed_webpage, 'JS player URL')
1349 player_url = json.loads(jsplayer_url_json)
1350 if player_url is None:
1351 player_url_json = self._search_regex(
1352 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1353 video_webpage, 'age gate player URL')
1354 player_url = json.loads(player_url_json)
1356 if self._downloader.params.get('verbose'):
1357 if player_url is None:
1358 player_version = 'unknown'
1359 player_desc = 'unknown'
1361 if player_url.endswith('swf'):
1362 player_version = self._search_regex(
1363 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1364 'flash player', fatal=False)
1365 player_desc = 'flash player %s' % player_version
1367 player_version = self._search_regex(
1368 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1370 'html5 player', fatal=False)
1371 player_desc = 'html5 player %s' % player_version
1373 parts_sizes = self._signature_cache_id(encrypted_sig)
1374 self.to_screen('{%s} signature length %s, %s' %
1375 (format_id, parts_sizes, player_desc))
1377 signature = self._decrypt_signature(
1378 encrypted_sig, video_id, player_url, age_gate)
1379 url += '&signature=' + signature
1380 if 'ratebypass' not in url:
1381 url += '&ratebypass=yes'
1383 # Some itags are not included in DASH manifest thus corresponding formats will
1384 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1385 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1386 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1387 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1389 'format_id': format_id,
1391 'player_url': player_url,
1392 'filesize': int_or_none(url_data.get('clen', [None])[0]),
1393 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1396 'fps': int_or_none(url_data.get('fps', [None])[0]),
1397 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1399 type_ = url_data.get('type', [None])[0]
1401 type_split = type_.split(';')
1402 kind_ext = type_split[0].split('/')
1403 if len(kind_ext) == 2:
1404 kind, ext = kind_ext
1406 if kind in ('audio', 'video'):
1408 for mobj in re.finditer(
1409 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1410 if mobj.group('key') == 'codecs':
1411 codecs = mobj.group('val')
1414 codecs = codecs.split(',')
1415 if len(codecs) == 2:
1416 acodec, vcodec = codecs[0], codecs[1]
1418 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1423 if format_id in self._formats:
1424 dct.update(self._formats[format_id])
1426 elif video_info.get('hlsvp'):
1427 manifest_url = video_info['hlsvp'][0]
1428 url_map = self._extract_from_m3u8(manifest_url, video_id)
1429 formats = _map_to_format_list(url_map)
1431 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1433 # Look for the DASH manifest
1434 if self._downloader.params.get('youtube_include_dash_manifest', True):
1435 dash_mpd_fatal = True
1436 for dash_manifest_url in dash_mpds:
1439 for df in self._parse_dash_manifest(
1440 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1441 # Do not overwrite DASH format found in some previous DASH manifest
1442 if df['format_id'] not in dash_formats:
1443 dash_formats[df['format_id']] = df
1444 # Additional DASH manifests may end up in HTTP Error 403 therefore
1445 # allow them to fail without bug report message if we already have
1446 # some DASH manifest succeeded. This is temporary workaround to reduce
1447 # burst of bug reports until we figure out the reason and whether it
1448 # can be fixed at all.
1449 dash_mpd_fatal = False
1450 except (ExtractorError, KeyError) as e:
1451 self.report_warning(
1452 'Skipping DASH manifest: %r' % e, video_id)
1454 # Remove the formats we found through non-DASH, they
1455 # contain less info and it can be wrong, because we use
1456 # fixed values (for example the resolution). See
1457 # https://github.com/rg3/youtube-dl/issues/5774 for an
1459 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1460 formats.extend(dash_formats.values())
1462 # Check for malformed aspect ratio
1463 stretched_m = re.search(
1464 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1467 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1469 if f.get('vcodec') != 'none':
1470 f['stretched_ratio'] = ratio
1472 self._sort_formats(formats)
1476 'uploader': video_uploader,
1477 'uploader_id': video_uploader_id,
1478 'upload_date': upload_date,
1479 'title': video_title,
1480 'thumbnail': video_thumbnail,
1481 'description': video_description,
1482 'categories': video_categories,
1484 'subtitles': video_subtitles,
1485 'automatic_captions': automatic_captions,
1486 'duration': video_duration,
1487 'age_limit': 18 if age_gate else 0,
1488 'annotations': video_annotations,
1489 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1490 'view_count': view_count,
1491 'like_count': like_count,
1492 'dislike_count': dislike_count,
1493 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1496 'start_time': start_time,
1497 'end_time': end_time,
1501 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1502 IE_DESC = 'YouTube.com playlists'
1503 _VALID_URL = r"""(?x)(?:
1508 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1509 \? (?:.*?&)*? (?:p|a|list)=
1513 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1514 # Top tracks, they can also include dots
1519 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1521 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1522 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1523 IE_NAME = 'youtube:playlist'
1525 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1527 'title': 'ytdl test PL',
1528 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1530 'playlist_count': 3,
1532 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1534 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1535 'title': 'YDL_Empty_List',
1537 'playlist_count': 0,
1539 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1540 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1542 'title': '29C3: Not my department',
1543 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1545 'playlist_count': 95,
1547 'note': 'issue #673',
1548 'url': 'PLBB231211A4F62143',
1550 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1551 'id': 'PLBB231211A4F62143',
1553 'playlist_mincount': 26,
1555 'note': 'Large playlist',
1556 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1558 'title': 'Uploads from Cauchemar',
1559 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1561 'playlist_mincount': 799,
1563 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1565 'title': 'YDL_safe_search',
1566 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1568 'playlist_count': 2,
1571 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1572 'playlist_count': 4,
1575 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1578 'note': 'Embedded SWF player',
1579 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1580 'playlist_count': 4,
1583 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1586 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1587 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1589 'title': 'Uploads from Interstellar Movie',
1590 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1592 'playlist_mincout': 21,
1595 def _real_initialize(self):
1598 def _extract_mix(self, playlist_id):
1599 # The mixes are generated from a single video
1600 # the id of the playlist is just 'RD' + video_id
1601 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1602 webpage = self._download_webpage(
1603 url, playlist_id, 'Downloading Youtube mix')
1604 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1606 search_title('playlist-title') or
1607 search_title('title long-title') or
1608 search_title('title'))
1609 title = clean_html(title_span)
1610 ids = orderedSet(re.findall(
1611 r'''(?xs)data-video-username=".*?".*?
1612 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1614 url_results = self._ids_to_results(ids)
1616 return self.playlist_result(url_results, playlist_id, title)
1618 def _extract_playlist(self, playlist_id):
1619 url = self._TEMPLATE_URL % playlist_id
1620 page = self._download_webpage(url, playlist_id)
1622 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1623 match = match.strip()
1624 # Check if the playlist exists or is private
1625 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1626 raise ExtractorError(
1627 'The playlist doesn\'t exist or is private, use --username or '
1628 '--netrc to access it.',
1630 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1631 raise ExtractorError(
1632 'Invalid parameters. Maybe URL is incorrect.',
1634 elif re.match(r'[^<]*Choose your language[^<]*', match):
1637 self.report_warning('Youtube gives an alert message: ' + match)
1639 playlist_title = self._html_search_regex(
1640 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1643 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1645 def _real_extract(self, url):
1646 # Extract playlist id
1647 mobj = re.match(self._VALID_URL, url)
1649 raise ExtractorError('Invalid URL: %s' % url)
1650 playlist_id = mobj.group(1) or mobj.group(2)
1652 # Check if it's a video-specific URL
1653 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1654 if 'v' in query_dict:
1655 video_id = query_dict['v'][0]
1656 if self._downloader.params.get('noplaylist'):
1657 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1658 return self.url_result(video_id, 'Youtube', video_id=video_id)
1660 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1662 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1663 # Mixes require a custom extraction process
1664 return self._extract_mix(playlist_id)
1666 return self._extract_playlist(playlist_id)
1669 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1670 IE_DESC = 'YouTube.com channels'
1671 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1672 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1673 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1674 IE_NAME = 'youtube:channel'
1676 'note': 'paginated channel',
1677 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1678 'playlist_mincount': 91,
1680 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1681 'title': 'Uploads from lex will',
1684 'note': 'Age restricted channel',
1685 # from https://www.youtube.com/user/DeusExOfficial
1686 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1687 'playlist_mincount': 64,
1689 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1690 'title': 'Uploads from Deus Ex',
1694 def _real_extract(self, url):
1695 channel_id = self._match_id(url)
1697 url = self._TEMPLATE_URL % channel_id
1699 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1700 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1701 # otherwise fallback on channel by page extraction
1702 channel_page = self._download_webpage(
1703 url + '?view=57', channel_id,
1704 'Downloading channel page', fatal=False)
1705 if channel_page is False:
1706 channel_playlist_id = False
1708 channel_playlist_id = self._html_search_meta(
1709 'channelId', channel_page, 'channel id', default=None)
1710 if not channel_playlist_id:
1711 channel_playlist_id = self._search_regex(
1712 r'data-(?:channel-external-|yt)id="([^"]+)"',
1713 channel_page, 'channel id', default=None)
1714 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1715 playlist_id = 'UU' + channel_playlist_id[2:]
1716 return self.url_result(
1717 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1719 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1720 autogenerated = re.search(r'''(?x)
1722 channel-header-autogenerated-label|
1723 yt-channel-title-autogenerated
1724 )[^"]*"''', channel_page) is not None
1727 # The videos are contained in a single page
1728 # the ajax pages can't be used, they are empty
1731 video_id, 'Youtube', video_id=video_id,
1732 video_title=video_title)
1733 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1734 return self.playlist_result(entries, channel_id)
1736 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1739 class YoutubeUserIE(YoutubeChannelIE):
1740 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1741 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1742 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1743 IE_NAME = 'youtube:user'
1746 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1747 'playlist_mincount': 320,
1749 'title': 'TheLinuxFoundation',
1752 'url': 'ytuser:phihag',
1753 'only_matching': True,
1757 def suitable(cls, url):
1758 # Don't return True if the url can be extracted with other youtube
1759 # extractor, the regex would is too permissive and it would match.
1760 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1761 if any(ie.suitable(url) for ie in other_ies):
1764 return super(YoutubeUserIE, cls).suitable(url)
1767 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1768 IE_DESC = 'YouTube.com searches'
1769 # there doesn't appear to be a real limit, for example if you search for
1770 # 'python' you get more than 8.000.000 results
1771 _MAX_RESULTS = float('inf')
1772 IE_NAME = 'youtube:search'
1773 _SEARCH_KEY = 'ytsearch'
1774 _EXTRA_QUERY_ARGS = {}
1777 def _get_n_results(self, query, n):
1778 """Get a specified number of results for a query"""
1783 for pagenum in itertools.count(1):
1785 'search_query': query.encode('utf-8'),
1789 url_query.update(self._EXTRA_QUERY_ARGS)
1790 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1791 data = self._download_json(
1792 result_url, video_id='query "%s"' % query,
1793 note='Downloading page %s' % pagenum,
1794 errnote='Unable to download API page')
1795 html_content = data[1]['body']['content']
1797 if 'class="search-message' in html_content:
1798 raise ExtractorError(
1799 '[youtube] No video results', expected=True)
1801 new_videos = self._ids_to_results(orderedSet(re.findall(
1802 r'href="/watch\?v=(.{11})', html_content)))
1803 videos += new_videos
1804 if not new_videos or len(videos) > limit:
1809 return self.playlist_result(videos, query)
1812 class YoutubeSearchDateIE(YoutubeSearchIE):
1813 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1814 _SEARCH_KEY = 'ytsearchdate'
1815 IE_DESC = 'YouTube.com searches, newest videos first'
1816 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1819 class YoutubeSearchURLIE(InfoExtractor):
1820 IE_DESC = 'YouTube.com search URLs'
1821 IE_NAME = 'youtube:search_url'
1822 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1824 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1825 'playlist_mincount': 5,
1827 'title': 'youtube-dl test video',
1831 def _real_extract(self, url):
1832 mobj = re.match(self._VALID_URL, url)
1833 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1835 webpage = self._download_webpage(url, query)
1836 result_code = self._search_regex(
1837 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1839 part_codes = re.findall(
1840 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1842 for part_code in part_codes:
1843 part_title = self._html_search_regex(
1844 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1845 part_url_snippet = self._html_search_regex(
1846 r'(?s)href="([^"]+)"', part_code, 'item URL')
1847 part_url = compat_urlparse.urljoin(
1848 'https://www.youtube.com/', part_url_snippet)
1852 'title': part_title,
1856 '_type': 'playlist',
1862 class YoutubeShowIE(InfoExtractor):
1863 IE_DESC = 'YouTube.com (multi-season) shows'
1864 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1865 IE_NAME = 'youtube:show'
1867 'url': 'https://www.youtube.com/show/airdisasters',
1868 'playlist_mincount': 5,
1870 'id': 'airdisasters',
1871 'title': 'Air Disasters',
1875 def _real_extract(self, url):
1876 mobj = re.match(self._VALID_URL, url)
1877 playlist_id = mobj.group('id')
1878 webpage = self._download_webpage(
1879 'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
1880 # There's one playlist for each season of the show
1881 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1882 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1885 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1886 for season in m_seasons
1888 title = self._og_search_title(webpage, fatal=False)
1891 '_type': 'playlist',
1898 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1900 Base class for feed extractors
1901 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1903 _LOGIN_REQUIRED = True
1907 return 'youtube:%s' % self._FEED_NAME
1909 def _real_initialize(self):
1912 def _real_extract(self, url):
1913 page = self._download_webpage(
1914 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1916 # The extraction process is the same as for playlists, but the regex
1917 # for the video ids doesn't contain an index
1919 more_widget_html = content_html = page
1920 for page_num in itertools.count(1):
1921 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1923 # 'recommended' feed has infinite 'load more' and each new portion spins
1924 # the same videos in (sometimes) slightly different order, so we'll check
1925 # for unicity and break when portion has no new videos
1926 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1932 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1936 more = self._download_json(
1937 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1938 'Downloading page #%s' % page_num,
1939 transform_source=uppercase_escape)
1940 content_html = more['content_html']
1941 more_widget_html = more['load_more_widget_html']
1943 return self.playlist_result(
1944 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1947 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1948 IE_NAME = 'youtube:watchlater'
1949 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1950 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1952 _TESTS = [] # override PlaylistIE tests
1954 def _real_extract(self, url):
1955 return self._extract_playlist('WL')
1958 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1959 IE_NAME = 'youtube:favorites'
1960 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1961 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1962 _LOGIN_REQUIRED = True
1964 def _real_extract(self, url):
1965 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1966 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1967 return self.url_result(playlist_id, 'YoutubePlaylist')
1970 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1971 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1972 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1973 _FEED_NAME = 'recommended'
1974 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1977 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1978 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1979 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1980 _FEED_NAME = 'subscriptions'
1981 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1984 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1985 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1986 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1987 _FEED_NAME = 'history'
1988 _PLAYLIST_TITLE = 'Youtube History'
1991 class YoutubeTruncatedURLIE(InfoExtractor):
1992 IE_NAME = 'youtube:truncated_url'
1993 IE_DESC = False # Do not list
1994 _VALID_URL = r'''(?x)
1996 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1999 annotation_id=annotation_[^&]+|
2005 attribution_link\?a=[^&]+
2011 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2012 'only_matching': True,
2014 'url': 'http://www.youtube.com/watch?',
2015 'only_matching': True,
2017 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2018 'only_matching': True,
2020 'url': 'https://www.youtube.com/watch?feature=foo',
2021 'only_matching': True,
2023 'url': 'https://www.youtube.com/watch?hl=en-GB',
2024 'only_matching': True,
2026 'url': 'https://www.youtube.com/watch?t=2372',
2027 'only_matching': True,
2030 def _real_extract(self, url):
2031 raise ExtractorError(
2032 'Did you forget to quote the URL? Remember that & is a meta '
2033 'character in most shells, so you want to put the URL in quotes, '
2035 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2036 ' or simply youtube-dl BaW_jenozKc .',
2040 class YoutubeTruncatedIDIE(InfoExtractor):
2041 IE_NAME = 'youtube:truncated_id'
2042 IE_DESC = False # Do not list
2043 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2046 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2047 'only_matching': True,
2050 def _real_extract(self, url):
2051 video_id = self._match_id(url)
2052 raise ExtractorError(
2053 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),