3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
32 get_element_by_attribute,
48 class YoutubeBaseInfoExtractor(InfoExtractor):
49 """Provide base functions for Youtube extractors"""
50 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
51 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
52 _NETRC_MACHINE = 'youtube'
53 # If True it will raise an error if no login info is provided
54 _LOGIN_REQUIRED = False
56 def _set_language(self):
58 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
59 # YouTube sets the expire time to about two months
60 expire_time=time.time() + 2 * 30 * 24 * 3600)
62 def _ids_to_results(self, ids):
64 self.url_result(vid_id, 'Youtube', video_id=vid_id)
69 Attempt to log in to YouTube.
70 True is returned if successful or skipped.
71 False is returned if login failed.
73 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
75 (username, password) = self._get_login_info()
76 # No authentication to be performed
78 if self._LOGIN_REQUIRED:
79 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
82 login_page = self._download_webpage(
83 self._LOGIN_URL, None,
84 note='Downloading login page',
85 errnote='unable to fetch login page', fatal=False)
86 if login_page is False:
89 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
90 login_page, 'Login GALX parameter')
94 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
99 'PersistentCookie': 'yes',
101 'bgresponse': 'js_disabled',
102 'checkConnection': '',
103 'checkedDomains': 'youtube',
110 'service': 'youtube',
115 login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
117 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
118 login_results = self._download_webpage(
120 note='Logging in', errnote='unable to log in', fatal=False)
121 if login_results is False:
124 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
125 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
128 # TODO add SMS and phone call support - these require making a request and then prompting the user
130 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
131 tfa_code = self._get_tfa_info('2-step verification code')
134 self._downloader.report_warning(
135 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
136 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
139 tfa_code = remove_start(tfa_code, 'G-')
141 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
143 tfa_form_strs.update({
148 tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
150 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
151 tfa_results = self._download_webpage(
153 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
155 if tfa_results is False:
158 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
159 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
161 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
162 self._downloader.report_warning('unable to log in - did the page structure change?')
164 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
165 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
168 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
169 self._downloader.report_warning('unable to log in: bad username or password')
173 def _real_initialize(self):
174 if self._downloader is None:
177 if not self._login():
181 class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
182 # Extract the video ids from the playlist pages
183 def _entries(self, page, playlist_id):
184 more_widget_html = content_html = page
185 for page_num in itertools.count(1):
186 for video_id, video_title in self.extract_videos_from_page(content_html):
187 yield self.url_result(
188 video_id, 'Youtube', video_id=video_id,
189 video_title=video_title)
191 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
195 more = self._download_json(
196 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
197 'Downloading page #%s' % page_num,
198 transform_source=uppercase_escape)
199 content_html = more['content_html']
200 if not content_html.strip():
201 # Some webpages show a "Load more" button but they don't
204 more_widget_html = more['load_more_widget_html']
206 def extract_videos_from_page(self, page):
209 for mobj in re.finditer(self._VIDEO_RE, page):
210 # The link with index 0 is not the first video of the playlist (not sure if still actual)
211 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
213 video_id = mobj.group('id')
214 video_title = unescapeHTML(mobj.group('title'))
216 video_title = video_title.strip()
218 idx = ids_in_page.index(video_id)
219 if video_title and not titles_in_page[idx]:
220 titles_in_page[idx] = video_title
222 ids_in_page.append(video_id)
223 titles_in_page.append(video_title)
224 return zip(ids_in_page, titles_in_page)
227 class YoutubeIE(YoutubeBaseInfoExtractor):
228 IE_DESC = 'YouTube.com'
229 _VALID_URL = r"""(?x)^
231 (?:https?://|//) # http(s):// or protocol-independent URL
232 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
233 (?:www\.)?deturl\.com/www\.youtube\.com/|
234 (?:www\.)?pwnyoutube\.com/|
235 (?:www\.)?yourepeat\.com/|
236 tube\.majestyc\.net/|
237 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
238 (?:.*?\#/)? # handle anchor (#/) redirect urls
239 (?: # the various things that can precede the ID:
240 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
241 |(?: # or the v= param in all its forms
242 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
243 (?:\?|\#!?) # the params delimiter ? or # or #!
244 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
249 youtu\.be| # just youtu.be/xxxx
250 vid\.plus # or vid.plus/xxxx
252 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
254 )? # all until now is optional -> you can pass the naked ID
255 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
256 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
257 (?(1).+)? # if we found the ID, everything can follow
259 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
261 '5': {'ext': 'flv', 'width': 400, 'height': 240},
262 '6': {'ext': 'flv', 'width': 450, 'height': 270},
263 '13': {'ext': '3gp'},
264 '17': {'ext': '3gp', 'width': 176, 'height': 144},
265 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
266 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
267 '34': {'ext': 'flv', 'width': 640, 'height': 360},
268 '35': {'ext': 'flv', 'width': 854, 'height': 480},
269 '36': {'ext': '3gp', 'width': 320, 'height': 240},
270 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
271 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
272 '43': {'ext': 'webm', 'width': 640, 'height': 360},
273 '44': {'ext': 'webm', 'width': 854, 'height': 480},
274 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
275 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
276 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
277 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
281 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
282 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
283 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
284 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
285 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
286 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
287 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
289 # Apple HTTP Live Streaming
290 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
291 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
292 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
293 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
294 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
295 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
296 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
299 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
300 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
301 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
302 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
303 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
304 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
305 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
306 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
307 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
308 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
309 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
312 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
313 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
314 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
317 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
318 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
319 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
320 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
321 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
322 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
323 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
324 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
325 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
326 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
327 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
328 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
329 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
330 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
331 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
332 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
333 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
334 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
335 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
336 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
337 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
340 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
341 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
343 # Dash webm audio with opus inside
344 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
345 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
346 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
349 '_rtmp': {'protocol': 'rtmp'},
355 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
359 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
360 'uploader': 'Philipp Hagemeister',
361 'uploader_id': 'phihag',
362 'upload_date': '20121002',
363 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
364 'categories': ['Science & Technology'],
365 'tags': ['youtube-dl'],
367 'dislike_count': int,
373 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
374 'note': 'Test generic use_cipher_signature video (#897)',
378 'upload_date': '20120506',
379 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
380 'description': 'md5:782e8651347686cba06e58f71ab51773',
381 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
382 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
383 'iconic ep', 'iconic', 'love', 'it'],
384 'uploader': 'Icona Pop',
385 'uploader_id': 'IconaPop',
389 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
390 'note': 'Test VEVO video with age protection (#956)',
394 'upload_date': '20130703',
395 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
396 'description': 'md5:64249768eec3bc4276236606ea996373',
397 'uploader': 'justintimberlakeVEVO',
398 'uploader_id': 'justintimberlakeVEVO',
403 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
404 'note': 'Embed-only video (#1746)',
408 'upload_date': '20120608',
409 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
410 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
411 'uploader': 'SET India',
412 'uploader_id': 'setindia'
416 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
417 'note': 'Use the first video ID in the URL',
421 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
422 'uploader': 'Philipp Hagemeister',
423 'uploader_id': 'phihag',
424 'upload_date': '20121002',
425 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
426 'categories': ['Science & Technology'],
427 'tags': ['youtube-dl'],
429 'dislike_count': int,
432 'skip_download': True,
436 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
437 'note': '256k DASH audio (format 141) via DASH manifest',
441 'upload_date': '20121002',
442 'uploader_id': '8KVIDEO',
444 'uploader': '8KVIDEO',
445 'title': 'UHDTV TEST 8K VIDEO.mp4'
448 'youtube_include_dash_manifest': True,
452 # DASH manifest with encrypted signature
454 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
458 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
459 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
460 'uploader': 'AfrojackVEVO',
461 'uploader_id': 'AfrojackVEVO',
462 'upload_date': '20131011',
465 'youtube_include_dash_manifest': True,
469 # JS player signature function name containing $
471 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
475 'title': 'Taylor Swift - Shake It Off',
476 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
477 'uploader': 'TaylorSwiftVEVO',
478 'uploader_id': 'TaylorSwiftVEVO',
479 'upload_date': '20140818',
482 'youtube_include_dash_manifest': True,
488 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
492 'upload_date': '20100909',
493 'uploader': 'The Amazing Atheist',
494 'uploader_id': 'TheAmazingAtheist',
495 'title': 'Burning Everyone\'s Koran',
496 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
499 # Normal age-gate video (No vevo, embed allowed)
501 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
505 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
506 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
507 'uploader': 'The Witcher',
508 'uploader_id': 'WitcherGame',
509 'upload_date': '20140605',
513 # Age-gate video with encrypted signature
515 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
519 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
520 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
521 'uploader': 'LloydVEVO',
522 'uploader_id': 'LloydVEVO',
523 'upload_date': '20110629',
527 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
529 'url': '__2ABJjxzNo',
533 'upload_date': '20100430',
534 'uploader_id': 'deadmau5',
535 'description': 'md5:12c56784b8032162bb936a5f76d55360',
536 'uploader': 'deadmau5',
537 'title': 'Deadmau5 - Some Chords (HD)',
539 'expected_warnings': [
540 'DASH manifest missing',
543 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
545 'url': 'lqQg6PlCWgI',
549 'upload_date': '20120724',
550 'uploader_id': 'olympic',
551 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
552 'uploader': 'Olympics',
553 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
556 'skip_download': 'requires avconv',
561 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
565 'stretched_ratio': 16 / 9.,
566 'upload_date': '20110310',
567 'uploader_id': 'AllenMeow',
568 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
570 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
573 # url_encoded_fmt_stream_map is empty string
575 'url': 'qEJwOuvDf7I',
579 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
581 'upload_date': '20150404',
582 'uploader_id': 'spbelect',
583 'uploader': 'Наблюдатели Петербурга',
586 'skip_download': 'requires avconv',
589 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
591 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
595 'title': 'md5:7b81415841e02ecd4313668cde88737a',
596 'description': 'md5:116377fd2963b81ec4ce64b542173306',
597 'upload_date': '20150625',
598 'uploader_id': 'dorappi2000',
599 'uploader': 'dorappi2000',
600 'formats': 'mincount:33',
603 # DASH manifest with segment_list
605 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
606 'md5': '8ce563a1d667b599d21064e982ab9e31',
610 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
611 'uploader': 'Airtek',
612 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
613 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
614 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
617 'youtube_include_dash_manifest': True,
618 'format': '135', # bestvideo
622 # Multifeed videos (multiple cameras), URL is for Main Camera
623 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
626 'title': 'teamPGP: Rocket League Noob Stream',
627 'description': 'md5:dc7872fb300e143831327f1bae3af010',
633 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
634 'description': 'md5:dc7872fb300e143831327f1bae3af010',
635 'upload_date': '20150721',
636 'uploader': 'Beer Games Beer',
637 'uploader_id': 'beergamesbeer',
643 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
644 'description': 'md5:dc7872fb300e143831327f1bae3af010',
645 'upload_date': '20150721',
646 'uploader': 'Beer Games Beer',
647 'uploader_id': 'beergamesbeer',
653 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
654 'description': 'md5:dc7872fb300e143831327f1bae3af010',
655 'upload_date': '20150721',
656 'uploader': 'Beer Games Beer',
657 'uploader_id': 'beergamesbeer',
663 'title': 'teamPGP: Rocket League Noob Stream (zim)',
664 'description': 'md5:dc7872fb300e143831327f1bae3af010',
665 'upload_date': '20150721',
666 'uploader': 'Beer Games Beer',
667 'uploader_id': 'beergamesbeer',
671 'skip_download': True,
675 'url': 'http://vid.plus/FlRa-iH7PGw',
676 'only_matching': True,
679 # Title with JS-like syntax "};"
680 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
684 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
685 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
686 'upload_date': '20151119',
687 'uploader_id': 'IronSoulElf',
688 'uploader': 'IronSoulElf',
691 'skip_download': True,
696 def __init__(self, *args, **kwargs):
697 super(YoutubeIE, self).__init__(*args, **kwargs)
698 self._player_cache = {}
700 def report_video_info_webpage_download(self, video_id):
701 """Report attempt to download video info webpage."""
702 self.to_screen('%s: Downloading video info webpage' % video_id)
704 def report_information_extraction(self, video_id):
705 """Report attempt to extract video information."""
706 self.to_screen('%s: Extracting video information' % video_id)
708 def report_unavailable_format(self, video_id, format):
709 """Report extracted video URL."""
710 self.to_screen('%s: Format %s not available' % (video_id, format))
712 def report_rtmp_download(self):
713 """Indicate the download will use the RTMP protocol."""
714 self.to_screen('RTMP download detected')
716 def _signature_cache_id(self, example_sig):
717 """ Return a string representation of a signature """
718 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
720 def _extract_signature_function(self, video_id, player_url, example_sig):
722 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
725 raise ExtractorError('Cannot identify player %r' % player_url)
726 player_type = id_m.group('ext')
727 player_id = id_m.group('id')
729 # Read from filesystem cache
730 func_id = '%s_%s_%s' % (
731 player_type, player_id, self._signature_cache_id(example_sig))
732 assert os.path.basename(func_id) == func_id
734 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
735 if cache_spec is not None:
736 return lambda s: ''.join(s[i] for i in cache_spec)
739 'Downloading player %s' % player_url
740 if self._downloader.params.get('verbose') else
741 'Downloading %s player %s' % (player_type, player_id)
743 if player_type == 'js':
744 code = self._download_webpage(
745 player_url, video_id,
747 errnote='Download of %s failed' % player_url)
748 res = self._parse_sig_js(code)
749 elif player_type == 'swf':
750 urlh = self._request_webpage(
751 player_url, video_id,
753 errnote='Download of %s failed' % player_url)
755 res = self._parse_sig_swf(code)
757 assert False, 'Invalid player type %r' % player_type
759 test_string = ''.join(map(compat_chr, range(len(example_sig))))
760 cache_res = res(test_string)
761 cache_spec = [ord(c) for c in cache_res]
763 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
766 def _print_sig_code(self, func, example_sig):
767 def gen_sig_code(idxs):
768 def _genslice(start, end, step):
769 starts = '' if start == 0 else str(start)
770 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
771 steps = '' if step == 1 else (':%d' % step)
772 return 's[%s%s%s]' % (starts, ends, steps)
775 # Quelch pyflakes warnings - start will be set when step is set
776 start = '(Never used)'
777 for i, prev in zip(idxs[1:], idxs[:-1]):
781 yield _genslice(start, prev, step)
784 if i - prev in [-1, 1]:
793 yield _genslice(start, i, step)
795 test_string = ''.join(map(compat_chr, range(len(example_sig))))
796 cache_res = func(test_string)
797 cache_spec = [ord(c) for c in cache_res]
798 expr_code = ' + '.join(gen_sig_code(cache_spec))
799 signature_id_tuple = '(%s)' % (
800 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
801 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
802 ' return %s\n') % (signature_id_tuple, expr_code)
803 self.to_screen('Extracted signature function:\n' + code)
805 def _parse_sig_js(self, jscode):
806 funcname = self._search_regex(
807 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
808 'Initial JS player signature function name')
810 jsi = JSInterpreter(jscode)
811 initial_function = jsi.extract_function(funcname)
812 return lambda s: initial_function([s])
814 def _parse_sig_swf(self, file_contents):
815 swfi = SWFInterpreter(file_contents)
816 TARGET_CLASSNAME = 'SignatureDecipher'
817 searched_class = swfi.extract_class(TARGET_CLASSNAME)
818 initial_function = swfi.extract_function(searched_class, 'decipher')
819 return lambda s: initial_function([s])
821 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
822 """Turn the encrypted s field into a working signature"""
824 if player_url is None:
825 raise ExtractorError('Cannot decrypt signature without player_url')
827 if player_url.startswith('//'):
828 player_url = 'https:' + player_url
830 player_id = (player_url, self._signature_cache_id(s))
831 if player_id not in self._player_cache:
832 func = self._extract_signature_function(
833 video_id, player_url, s
835 self._player_cache[player_id] = func
836 func = self._player_cache[player_id]
837 if self._downloader.params.get('youtube_print_sig_code'):
838 self._print_sig_code(func, s)
840 except Exception as e:
841 tb = traceback.format_exc()
842 raise ExtractorError(
843 'Signature extraction failed: ' + tb, cause=e)
845 def _get_subtitles(self, video_id, webpage):
847 subs_doc = self._download_xml(
848 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
849 video_id, note=False)
850 except ExtractorError as err:
851 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
855 for track in subs_doc.findall('track'):
856 lang = track.attrib['lang_code']
857 if lang in sub_lang_list:
860 for ext in ['sbv', 'vtt', 'srt']:
861 params = compat_urllib_parse.urlencode({
865 'name': track.attrib['name'].encode('utf-8'),
868 'url': 'https://www.youtube.com/api/timedtext?' + params,
871 sub_lang_list[lang] = sub_formats
872 if not sub_lang_list:
873 self._downloader.report_warning('video doesn\'t have subtitles')
877 def _get_ytplayer_config(self, webpage):
879 r';ytplayer\.config\s*=\s*({.*?});ytplayer',
880 r';ytplayer\.config\s*=\s*({.*?});',
882 for pattern in patterns:
883 config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None)
884 if config is not None:
885 return json.loads(uppercase_escape(config))
887 def _get_automatic_captions(self, video_id, webpage):
888 """We need the webpage for getting the captions url, pass it as an
889 argument to speed up the process."""
890 self.to_screen('%s: Looking for automatic captions' % video_id)
891 player_config = self._get_ytplayer_config(webpage)
892 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
893 if player_config is None:
894 self._downloader.report_warning(err_msg)
897 args = player_config['args']
898 caption_url = args['ttsurl']
899 timestamp = args['timestamp']
900 # We get the available subtitles
901 list_params = compat_urllib_parse.urlencode({
906 list_url = caption_url + '&' + list_params
907 caption_list = self._download_xml(list_url, video_id)
908 original_lang_node = caption_list.find('track')
909 if original_lang_node is None:
910 self._downloader.report_warning('Video doesn\'t have automatic captions')
912 original_lang = original_lang_node.attrib['lang_code']
913 caption_kind = original_lang_node.attrib.get('kind', '')
916 for lang_node in caption_list.findall('target'):
917 sub_lang = lang_node.attrib['lang_code']
919 for ext in ['sbv', 'vtt', 'srt']:
920 params = compat_urllib_parse.urlencode({
921 'lang': original_lang,
925 'kind': caption_kind,
928 'url': caption_url + '&' + params,
931 sub_lang_list[sub_lang] = sub_formats
933 # An extractor error can be raise by the download process if there are
934 # no automatic captions but there are subtitles
935 except (KeyError, ExtractorError):
936 self._downloader.report_warning(err_msg)
940 def extract_id(cls, url):
941 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
943 raise ExtractorError('Invalid URL: %s' % url)
944 video_id = mobj.group(2)
947 def _extract_from_m3u8(self, manifest_url, video_id):
950 def _get_urls(_manifest):
951 lines = _manifest.split('\n')
952 urls = filter(lambda l: l and not l.startswith('#'),
955 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
956 formats_urls = _get_urls(manifest)
957 for format_url in formats_urls:
958 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
959 url_map[itag] = format_url
962 def _extract_annotations(self, video_id):
963 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
964 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
966 def _parse_dash_manifest(
967 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
968 def decrypt_sig(mobj):
970 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
971 return '/signature/%s' % dec_s
972 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
973 dash_doc = self._download_xml(
974 dash_manifest_url, video_id,
975 note='Downloading DASH manifest',
976 errnote='Could not download DASH manifest',
979 if dash_doc is False:
983 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
984 mime_type = a.attrib.get('mimeType')
985 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
986 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
989 if mime_type == 'text/vtt':
990 # TODO implement WebVTT downloading
992 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
993 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
994 format_id = r.attrib['id']
995 video_url = url_el.text
996 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
998 'format_id': format_id,
1000 'width': int_or_none(r.attrib.get('width')),
1001 'height': int_or_none(r.attrib.get('height')),
1002 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1003 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1004 'filesize': filesize,
1005 'fps': int_or_none(r.attrib.get('frameRate')),
1007 if segment_list is not None:
1009 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1010 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1011 'protocol': 'http_dash_segments',
1014 existing_format = next(
1015 fo for fo in formats
1016 if fo['format_id'] == format_id)
1017 except StopIteration:
1018 full_info = self._formats.get(format_id, {}).copy()
1020 codecs = r.attrib.get('codecs')
1022 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1023 full_info['vcodec'] = codecs
1024 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1025 full_info['acodec'] = codecs
1026 formats.append(full_info)
1028 existing_format.update(f)
1030 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1033 def _real_extract(self, url):
1034 url, smuggled_data = unsmuggle_url(url, {})
1037 'http' if self._downloader.params.get('prefer_insecure', False)
1042 parsed_url = compat_urllib_parse_urlparse(url)
1043 for component in [parsed_url.fragment, parsed_url.query]:
1044 query = compat_parse_qs(component)
1045 if start_time is None and 't' in query:
1046 start_time = parse_duration(query['t'][0])
1047 if start_time is None and 'start' in query:
1048 start_time = parse_duration(query['start'][0])
1049 if end_time is None and 'end' in query:
1050 end_time = parse_duration(query['end'][0])
1052 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1053 mobj = re.search(self._NEXT_URL_RE, url)
1055 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1056 video_id = self.extract_id(url)
1059 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1060 video_webpage = self._download_webpage(url, video_id)
1062 # Attempt to extract SWF player URL
1063 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1064 if mobj is not None:
1065 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1071 def add_dash_mpd(video_info):
1072 dash_mpd = video_info.get('dashmpd')
1073 if dash_mpd and dash_mpd[0] not in dash_mpds:
1074 dash_mpds.append(dash_mpd[0])
1077 embed_webpage = None
1079 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1081 # We simulate the access to the video from www.youtube.com/v/{video_id}
1082 # this can be viewed without login into Youtube
1083 url = proto + '://www.youtube.com/embed/%s' % video_id
1084 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1085 data = compat_urllib_parse.urlencode({
1086 'video_id': video_id,
1087 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1088 'sts': self._search_regex(
1089 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1091 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1092 video_info_webpage = self._download_webpage(
1093 video_info_url, video_id,
1094 note='Refetching age-gated info webpage',
1095 errnote='unable to download video info webpage')
1096 video_info = compat_parse_qs(video_info_webpage)
1097 add_dash_mpd(video_info)
1101 # Try looking directly into the video webpage
1102 ytplayer_config = self._get_ytplayer_config(video_webpage)
1103 if ytplayer_config is not None:
1104 args = ytplayer_config['args']
1105 if args.get('url_encoded_fmt_stream_map'):
1106 # Convert to the same format returned by compat_parse_qs
1107 video_info = dict((k, [v]) for k, v in args.items())
1108 add_dash_mpd(video_info)
1109 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1111 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1112 # We also try looking in get_video_info since it may contain different dashmpd
1113 # URL that points to a DASH manifest with possibly different itag set (some itags
1114 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1115 # manifest pointed by get_video_info's dashmpd).
1116 # The general idea is to take a union of itags of both DASH manifests (for example
1117 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1118 self.report_video_info_webpage_download(video_id)
1119 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1121 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1122 % (proto, video_id, el_type))
1123 video_info_webpage = self._download_webpage(
1125 video_id, note=False,
1126 errnote='unable to download video info webpage')
1127 get_video_info = compat_parse_qs(video_info_webpage)
1128 if get_video_info.get('use_cipher_signature') != ['True']:
1129 add_dash_mpd(get_video_info)
1131 video_info = get_video_info
1132 if 'token' in get_video_info:
1133 # Different get_video_info requests may report different results, e.g.
1134 # some may report video unavailability, but some may serve it without
1135 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1136 # the original webpage as well as el=info and el=embedded get_video_info
1137 # requests report video unavailability due to geo restriction while
1138 # el=detailpage succeeds and returns valid data). This is probably
1139 # due to YouTube measures against IP ranges of hosting providers.
1140 # Working around by preferring the first succeeded video_info containing
1141 # the token if no such video_info yet was found.
1142 if 'token' not in video_info:
1143 video_info = get_video_info
1145 if 'token' not in video_info:
1146 if 'reason' in video_info:
1147 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1148 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1150 raise ExtractorError('YouTube said: This video is available in %s only' % (
1151 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1153 raise ExtractorError(
1154 'YouTube said: %s' % video_info['reason'][0],
1155 expected=True, video_id=video_id)
1157 raise ExtractorError(
1158 '"token" parameter not in video info for unknown reason',
1162 if 'title' in video_info:
1163 video_title = video_info['title'][0]
1165 self._downloader.report_warning('Unable to extract video title')
1169 video_description = get_element_by_id("eow-description", video_webpage)
1170 if video_description:
1171 video_description = re.sub(r'''(?x)
1173 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1175 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1176 class="yt-uix-redirect-link"\s*>
1179 ''', r'\1', video_description)
1180 video_description = clean_html(video_description)
1182 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1184 video_description = unescapeHTML(fd_mobj.group(1))
1186 video_description = ''
1188 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1189 if not self._downloader.params.get('noplaylist'):
1192 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1193 for feed in multifeed_metadata_list.split(','):
1194 feed_data = compat_parse_qs(feed)
1196 '_type': 'url_transparent',
1197 'ie_key': 'Youtube',
1199 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1200 {'force_singlefeed': True}),
1201 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1203 feed_ids.append(feed_data['id'][0])
1205 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1206 % (', '.join(feed_ids), video_id))
1207 return self.playlist_result(entries, video_id, video_title, video_description)
1208 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1210 if 'view_count' in video_info:
1211 view_count = int(video_info['view_count'][0])
1215 # Check for "rental" videos
1216 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1217 raise ExtractorError('"rental" videos not supported')
1219 # Start extracting information
1220 self.report_information_extraction(video_id)
1223 if 'author' not in video_info:
1224 raise ExtractorError('Unable to extract uploader name')
1225 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1228 video_uploader_id = None
1229 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1230 if mobj is not None:
1231 video_uploader_id = mobj.group(1)
1233 self._downloader.report_warning('unable to extract uploader nickname')
1236 # We try first to get a high quality image:
1237 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1238 video_webpage, re.DOTALL)
1239 if m_thumb is not None:
1240 video_thumbnail = m_thumb.group(1)
1241 elif 'thumbnail_url' not in video_info:
1242 self._downloader.report_warning('unable to extract video thumbnail')
1243 video_thumbnail = None
1244 else: # don't panic if we can't find it
1245 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1248 upload_date = self._html_search_meta(
1249 'datePublished', video_webpage, 'upload date', default=None)
1251 upload_date = self._search_regex(
1252 [r'(?s)id="eow-date.*?>(.*?)</span>',
1253 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1254 video_webpage, 'upload date', default=None)
1256 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1257 upload_date = unified_strdate(upload_date)
1259 m_cat_container = self._search_regex(
1260 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1261 video_webpage, 'categories', default=None)
1263 category = self._html_search_regex(
1264 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1266 video_categories = None if category is None else [category]
1268 video_categories = None
1271 unescapeHTML(m.group('content'))
1272 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1274 def _extract_count(count_name):
1275 return str_to_int(self._search_regex(
1276 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1277 % re.escape(count_name),
1278 video_webpage, count_name, default=None))
1280 like_count = _extract_count('like')
1281 dislike_count = _extract_count('dislike')
1284 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1285 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1287 if 'length_seconds' not in video_info:
1288 self._downloader.report_warning('unable to extract video duration')
1289 video_duration = None
1291 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1294 video_annotations = None
1295 if self._downloader.params.get('writeannotations', False):
1296 video_annotations = self._extract_annotations(video_id)
1298 def _map_to_format_list(urlmap):
1300 for itag, video_real_url in urlmap.items():
1303 'url': video_real_url,
1304 'player_url': player_url,
1306 if itag in self._formats:
1307 dct.update(self._formats[itag])
1311 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1312 self.report_rtmp_download()
1314 'format_id': '_rtmp',
1316 'url': video_info['conn'][0],
1317 'player_url': player_url,
1319 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1320 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1321 if 'rtmpe%3Dyes' in encoded_url_map:
1322 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1324 for url_data_str in encoded_url_map.split(','):
1325 url_data = compat_parse_qs(url_data_str)
1326 if 'itag' not in url_data or 'url' not in url_data:
1328 format_id = url_data['itag'][0]
1329 url = url_data['url'][0]
1331 if 'sig' in url_data:
1332 url += '&signature=' + url_data['sig'][0]
1333 elif 's' in url_data:
1334 encrypted_sig = url_data['s'][0]
1335 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1337 jsplayer_url_json = self._search_regex(
1339 embed_webpage if age_gate else video_webpage,
1340 'JS player URL (1)', default=None)
1341 if not jsplayer_url_json and not age_gate:
1342 # We need the embed website after all
1343 if embed_webpage is None:
1344 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1345 embed_webpage = self._download_webpage(
1346 embed_url, video_id, 'Downloading embed webpage')
1347 jsplayer_url_json = self._search_regex(
1348 ASSETS_RE, embed_webpage, 'JS player URL')
1350 player_url = json.loads(jsplayer_url_json)
1351 if player_url is None:
1352 player_url_json = self._search_regex(
1353 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1354 video_webpage, 'age gate player URL')
1355 player_url = json.loads(player_url_json)
1357 if self._downloader.params.get('verbose'):
1358 if player_url is None:
1359 player_version = 'unknown'
1360 player_desc = 'unknown'
1362 if player_url.endswith('swf'):
1363 player_version = self._search_regex(
1364 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1365 'flash player', fatal=False)
1366 player_desc = 'flash player %s' % player_version
1368 player_version = self._search_regex(
1369 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1371 'html5 player', fatal=False)
1372 player_desc = 'html5 player %s' % player_version
1374 parts_sizes = self._signature_cache_id(encrypted_sig)
1375 self.to_screen('{%s} signature length %s, %s' %
1376 (format_id, parts_sizes, player_desc))
1378 signature = self._decrypt_signature(
1379 encrypted_sig, video_id, player_url, age_gate)
1380 url += '&signature=' + signature
1381 if 'ratebypass' not in url:
1382 url += '&ratebypass=yes'
1384 # Some itags are not included in DASH manifest thus corresponding formats will
1385 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1386 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1387 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1388 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1390 'format_id': format_id,
1392 'player_url': player_url,
1393 'filesize': int_or_none(url_data.get('clen', [None])[0]),
1394 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1397 'fps': int_or_none(url_data.get('fps', [None])[0]),
1398 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1400 type_ = url_data.get('type', [None])[0]
1402 type_split = type_.split(';')
1403 kind_ext = type_split[0].split('/')
1404 if len(kind_ext) == 2:
1405 kind, ext = kind_ext
1407 if kind in ('audio', 'video'):
1409 for mobj in re.finditer(
1410 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1411 if mobj.group('key') == 'codecs':
1412 codecs = mobj.group('val')
1415 codecs = codecs.split(',')
1416 if len(codecs) == 2:
1417 acodec, vcodec = codecs[0], codecs[1]
1419 acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1424 if format_id in self._formats:
1425 dct.update(self._formats[format_id])
1427 elif video_info.get('hlsvp'):
1428 manifest_url = video_info['hlsvp'][0]
1429 url_map = self._extract_from_m3u8(manifest_url, video_id)
1430 formats = _map_to_format_list(url_map)
1432 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1434 # Look for the DASH manifest
1435 if self._downloader.params.get('youtube_include_dash_manifest', True):
1436 dash_mpd_fatal = True
1437 for dash_manifest_url in dash_mpds:
1440 for df in self._parse_dash_manifest(
1441 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1442 # Do not overwrite DASH format found in some previous DASH manifest
1443 if df['format_id'] not in dash_formats:
1444 dash_formats[df['format_id']] = df
1445 # Additional DASH manifests may end up in HTTP Error 403 therefore
1446 # allow them to fail without bug report message if we already have
1447 # some DASH manifest succeeded. This is temporary workaround to reduce
1448 # burst of bug reports until we figure out the reason and whether it
1449 # can be fixed at all.
1450 dash_mpd_fatal = False
1451 except (ExtractorError, KeyError) as e:
1452 self.report_warning(
1453 'Skipping DASH manifest: %r' % e, video_id)
1455 # Remove the formats we found through non-DASH, they
1456 # contain less info and it can be wrong, because we use
1457 # fixed values (for example the resolution). See
1458 # https://github.com/rg3/youtube-dl/issues/5774 for an
1460 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1461 formats.extend(dash_formats.values())
1463 # Check for malformed aspect ratio
1464 stretched_m = re.search(
1465 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1468 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1470 if f.get('vcodec') != 'none':
1471 f['stretched_ratio'] = ratio
1473 self._sort_formats(formats)
1477 'uploader': video_uploader,
1478 'uploader_id': video_uploader_id,
1479 'upload_date': upload_date,
1480 'title': video_title,
1481 'thumbnail': video_thumbnail,
1482 'description': video_description,
1483 'categories': video_categories,
1485 'subtitles': video_subtitles,
1486 'automatic_captions': automatic_captions,
1487 'duration': video_duration,
1488 'age_limit': 18 if age_gate else 0,
1489 'annotations': video_annotations,
1490 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1491 'view_count': view_count,
1492 'like_count': like_count,
1493 'dislike_count': dislike_count,
1494 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1497 'start_time': start_time,
1498 'end_time': end_time,
1502 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1503 IE_DESC = 'YouTube.com playlists'
1504 _VALID_URL = r"""(?x)(?:
1509 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1510 \? (?:.*?&)*? (?:p|a|list)=
1514 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1515 # Top tracks, they can also include dots
1520 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1522 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1523 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1524 IE_NAME = 'youtube:playlist'
1526 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1528 'title': 'ytdl test PL',
1529 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1531 'playlist_count': 3,
1533 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1535 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1536 'title': 'YDL_Empty_List',
1538 'playlist_count': 0,
1540 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1541 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1543 'title': '29C3: Not my department',
1544 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1546 'playlist_count': 95,
1548 'note': 'issue #673',
1549 'url': 'PLBB231211A4F62143',
1551 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1552 'id': 'PLBB231211A4F62143',
1554 'playlist_mincount': 26,
1556 'note': 'Large playlist',
1557 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1559 'title': 'Uploads from Cauchemar',
1560 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1562 'playlist_mincount': 799,
1564 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1566 'title': 'YDL_safe_search',
1567 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1569 'playlist_count': 2,
1572 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1573 'playlist_count': 4,
1576 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1579 'note': 'Embedded SWF player',
1580 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1581 'playlist_count': 4,
1584 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1587 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1588 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1590 'title': 'Uploads from Interstellar Movie',
1591 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1593 'playlist_mincout': 21,
1596 def _real_initialize(self):
1599 def _extract_mix(self, playlist_id):
1600 # The mixes are generated from a single video
1601 # the id of the playlist is just 'RD' + video_id
1602 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1603 webpage = self._download_webpage(
1604 url, playlist_id, 'Downloading Youtube mix')
1605 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1607 search_title('playlist-title') or
1608 search_title('title long-title') or
1609 search_title('title'))
1610 title = clean_html(title_span)
1611 ids = orderedSet(re.findall(
1612 r'''(?xs)data-video-username=".*?".*?
1613 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1615 url_results = self._ids_to_results(ids)
1617 return self.playlist_result(url_results, playlist_id, title)
1619 def _extract_playlist(self, playlist_id):
1620 url = self._TEMPLATE_URL % playlist_id
1621 page = self._download_webpage(url, playlist_id)
1623 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1624 match = match.strip()
1625 # Check if the playlist exists or is private
1626 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1627 raise ExtractorError(
1628 'The playlist doesn\'t exist or is private, use --username or '
1629 '--netrc to access it.',
1631 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1632 raise ExtractorError(
1633 'Invalid parameters. Maybe URL is incorrect.',
1635 elif re.match(r'[^<]*Choose your language[^<]*', match):
1638 self.report_warning('Youtube gives an alert message: ' + match)
1640 playlist_title = self._html_search_regex(
1641 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1644 return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1646 def _real_extract(self, url):
1647 # Extract playlist id
1648 mobj = re.match(self._VALID_URL, url)
1650 raise ExtractorError('Invalid URL: %s' % url)
1651 playlist_id = mobj.group(1) or mobj.group(2)
1653 # Check if it's a video-specific URL
1654 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1655 if 'v' in query_dict:
1656 video_id = query_dict['v'][0]
1657 if self._downloader.params.get('noplaylist'):
1658 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1659 return self.url_result(video_id, 'Youtube', video_id=video_id)
1661 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1663 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1664 # Mixes require a custom extraction process
1665 return self._extract_mix(playlist_id)
1667 return self._extract_playlist(playlist_id)
1670 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1671 IE_DESC = 'YouTube.com channels'
1672 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1673 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1674 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1675 IE_NAME = 'youtube:channel'
1677 'note': 'paginated channel',
1678 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1679 'playlist_mincount': 91,
1681 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1682 'title': 'Uploads from lex will',
1685 'note': 'Age restricted channel',
1686 # from https://www.youtube.com/user/DeusExOfficial
1687 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1688 'playlist_mincount': 64,
1690 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1691 'title': 'Uploads from Deus Ex',
1695 def _real_extract(self, url):
1696 channel_id = self._match_id(url)
1698 url = self._TEMPLATE_URL % channel_id
1700 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1701 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1702 # otherwise fallback on channel by page extraction
1703 channel_page = self._download_webpage(
1704 url + '?view=57', channel_id,
1705 'Downloading channel page', fatal=False)
1706 if channel_page is False:
1707 channel_playlist_id = False
1709 channel_playlist_id = self._html_search_meta(
1710 'channelId', channel_page, 'channel id', default=None)
1711 if not channel_playlist_id:
1712 channel_playlist_id = self._search_regex(
1713 r'data-(?:channel-external-|yt)id="([^"]+)"',
1714 channel_page, 'channel id', default=None)
1715 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1716 playlist_id = 'UU' + channel_playlist_id[2:]
1717 return self.url_result(
1718 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1720 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1721 autogenerated = re.search(r'''(?x)
1723 channel-header-autogenerated-label|
1724 yt-channel-title-autogenerated
1725 )[^"]*"''', channel_page) is not None
1728 # The videos are contained in a single page
1729 # the ajax pages can't be used, they are empty
1732 video_id, 'Youtube', video_id=video_id,
1733 video_title=video_title)
1734 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1735 return self.playlist_result(entries, channel_id)
1737 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1740 class YoutubeUserIE(YoutubeChannelIE):
1741 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1742 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1743 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1744 IE_NAME = 'youtube:user'
1747 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1748 'playlist_mincount': 320,
1750 'title': 'TheLinuxFoundation',
1753 'url': 'ytuser:phihag',
1754 'only_matching': True,
1758 def suitable(cls, url):
1759 # Don't return True if the url can be extracted with other youtube
1760 # extractor, the regex would is too permissive and it would match.
1761 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1762 if any(ie.suitable(url) for ie in other_ies):
1765 return super(YoutubeUserIE, cls).suitable(url)
1768 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1769 IE_DESC = 'YouTube.com searches'
1770 # there doesn't appear to be a real limit, for example if you search for
1771 # 'python' you get more than 8.000.000 results
1772 _MAX_RESULTS = float('inf')
1773 IE_NAME = 'youtube:search'
1774 _SEARCH_KEY = 'ytsearch'
1775 _EXTRA_QUERY_ARGS = {}
1778 def _get_n_results(self, query, n):
1779 """Get a specified number of results for a query"""
1784 for pagenum in itertools.count(1):
1786 'search_query': query.encode('utf-8'),
1790 url_query.update(self._EXTRA_QUERY_ARGS)
1791 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1792 data = self._download_json(
1793 result_url, video_id='query "%s"' % query,
1794 note='Downloading page %s' % pagenum,
1795 errnote='Unable to download API page')
1796 html_content = data[1]['body']['content']
1798 if 'class="search-message' in html_content:
1799 raise ExtractorError(
1800 '[youtube] No video results', expected=True)
1802 new_videos = self._ids_to_results(orderedSet(re.findall(
1803 r'href="/watch\?v=(.{11})', html_content)))
1804 videos += new_videos
1805 if not new_videos or len(videos) > limit:
1810 return self.playlist_result(videos, query)
1813 class YoutubeSearchDateIE(YoutubeSearchIE):
1814 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1815 _SEARCH_KEY = 'ytsearchdate'
1816 IE_DESC = 'YouTube.com searches, newest videos first'
1817 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1820 class YoutubeSearchURLIE(InfoExtractor):
1821 IE_DESC = 'YouTube.com search URLs'
1822 IE_NAME = 'youtube:search_url'
1823 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1825 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1826 'playlist_mincount': 5,
1828 'title': 'youtube-dl test video',
1832 def _real_extract(self, url):
1833 mobj = re.match(self._VALID_URL, url)
1834 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1836 webpage = self._download_webpage(url, query)
1837 result_code = self._search_regex(
1838 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1840 part_codes = re.findall(
1841 r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1843 for part_code in part_codes:
1844 part_title = self._html_search_regex(
1845 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1846 part_url_snippet = self._html_search_regex(
1847 r'(?s)href="([^"]+)"', part_code, 'item URL')
1848 part_url = compat_urlparse.urljoin(
1849 'https://www.youtube.com/', part_url_snippet)
1853 'title': part_title,
1857 '_type': 'playlist',
1863 class YoutubeShowIE(InfoExtractor):
1864 IE_DESC = 'YouTube.com (multi-season) shows'
1865 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1866 IE_NAME = 'youtube:show'
1868 'url': 'https://www.youtube.com/show/airdisasters',
1869 'playlist_mincount': 5,
1871 'id': 'airdisasters',
1872 'title': 'Air Disasters',
1876 def _real_extract(self, url):
1877 mobj = re.match(self._VALID_URL, url)
1878 playlist_id = mobj.group('id')
1879 webpage = self._download_webpage(
1880 'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
1881 # There's one playlist for each season of the show
1882 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1883 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1886 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1887 for season in m_seasons
1889 title = self._og_search_title(webpage, fatal=False)
1892 '_type': 'playlist',
1899 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1901 Base class for feed extractors
1902 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1904 _LOGIN_REQUIRED = True
1908 return 'youtube:%s' % self._FEED_NAME
1910 def _real_initialize(self):
1913 def _real_extract(self, url):
1914 page = self._download_webpage(
1915 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1917 # The extraction process is the same as for playlists, but the regex
1918 # for the video ids doesn't contain an index
1920 more_widget_html = content_html = page
1921 for page_num in itertools.count(1):
1922 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1924 # 'recommended' feed has infinite 'load more' and each new portion spins
1925 # the same videos in (sometimes) slightly different order, so we'll check
1926 # for unicity and break when portion has no new videos
1927 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1933 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1937 more = self._download_json(
1938 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1939 'Downloading page #%s' % page_num,
1940 transform_source=uppercase_escape)
1941 content_html = more['content_html']
1942 more_widget_html = more['load_more_widget_html']
1944 return self.playlist_result(
1945 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1948 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1949 IE_NAME = 'youtube:watchlater'
1950 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1951 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1953 _TESTS = [] # override PlaylistIE tests
1955 def _real_extract(self, url):
1956 return self._extract_playlist('WL')
1959 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1960 IE_NAME = 'youtube:favorites'
1961 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1962 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1963 _LOGIN_REQUIRED = True
1965 def _real_extract(self, url):
1966 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1967 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1968 return self.url_result(playlist_id, 'YoutubePlaylist')
1971 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1972 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1973 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1974 _FEED_NAME = 'recommended'
1975 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1978 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1979 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1980 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1981 _FEED_NAME = 'subscriptions'
1982 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1985 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1986 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1987 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1988 _FEED_NAME = 'history'
1989 _PLAYLIST_TITLE = 'Youtube History'
1992 class YoutubeTruncatedURLIE(InfoExtractor):
1993 IE_NAME = 'youtube:truncated_url'
1994 IE_DESC = False # Do not list
1995 _VALID_URL = r'''(?x)
1997 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2000 annotation_id=annotation_[^&]+|
2006 attribution_link\?a=[^&]+
2012 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2013 'only_matching': True,
2015 'url': 'http://www.youtube.com/watch?',
2016 'only_matching': True,
2018 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2019 'only_matching': True,
2021 'url': 'https://www.youtube.com/watch?feature=foo',
2022 'only_matching': True,
2024 'url': 'https://www.youtube.com/watch?hl=en-GB',
2025 'only_matching': True,
2027 'url': 'https://www.youtube.com/watch?t=2372',
2028 'only_matching': True,
2031 def _real_extract(self, url):
2032 raise ExtractorError(
2033 'Did you forget to quote the URL? Remember that & is a meta '
2034 'character in most shells, so you want to put the URL in quotes, '
2036 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2037 ' or simply youtube-dl BaW_jenozKc .',
2041 class YoutubeTruncatedIDIE(InfoExtractor):
2042 IE_NAME = 'youtube:truncated_id'
2043 IE_DESC = False # Do not list
2044 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2047 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2048 'only_matching': True,
2051 def _real_extract(self, url):
2052 video_id = self._match_id(url)
2053 raise ExtractorError(
2054 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),