3 from __future__ import unicode_literals
13 from .common import InfoExtractor, SearchInfoExtractor
14 from ..jsinterp import JSInterpreter
15 from ..swfinterp import SWFInterpreter
16 from ..compat import (
20 compat_urllib_parse_unquote,
21 compat_urllib_parse_unquote_plus,
22 compat_urllib_parse_urlparse,
23 compat_urllib_request,
31 get_element_by_attribute,
47 class YoutubeBaseInfoExtractor(InfoExtractor):
48 """Provide base functions for Youtube extractors"""
49 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
50 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
51 _NETRC_MACHINE = 'youtube'
52 # If True it will raise an error if no login info is provided
53 _LOGIN_REQUIRED = False
55 def _set_language(self):
57 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
58 # YouTube sets the expire time to about two months
59 expire_time=time.time() + 2 * 30 * 24 * 3600)
61 def _ids_to_results(self, ids):
63 self.url_result(vid_id, 'Youtube', video_id=vid_id)
68 Attempt to log in to YouTube.
69 True is returned if successful or skipped.
70 False is returned if login failed.
72 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
74 (username, password) = self._get_login_info()
75 # No authentication to be performed
77 if self._LOGIN_REQUIRED:
78 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
81 login_page = self._download_webpage(
82 self._LOGIN_URL, None,
83 note='Downloading login page',
84 errnote='unable to fetch login page', fatal=False)
85 if login_page is False:
88 galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
89 login_page, 'Login GALX parameter')
93 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
98 'PersistentCookie': 'yes',
100 'bgresponse': 'js_disabled',
101 'checkConnection': '',
102 'checkedDomains': 'youtube',
109 'service': 'youtube',
114 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
116 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
117 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
119 req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
120 login_results = self._download_webpage(
122 note='Logging in', errnote='unable to log in', fatal=False)
123 if login_results is False:
126 if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
127 raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
130 # TODO add SMS and phone call support - these require making a request and then prompting the user
132 if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
133 tfa_code = self._get_tfa_info('2-step verification code')
136 self._downloader.report_warning(
137 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
138 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
141 tfa_code = remove_start(tfa_code, 'G-')
143 tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
145 tfa_form_strs.update({
150 tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
151 tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
153 tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
154 tfa_results = self._download_webpage(
156 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
158 if tfa_results is False:
161 if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
162 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
164 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
165 self._downloader.report_warning('unable to log in - did the page structure change?')
167 if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
168 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
171 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
172 self._downloader.report_warning('unable to log in: bad username or password')
176 def _real_initialize(self):
177 if self._downloader is None:
180 if not self._login():
184 class YoutubeIE(YoutubeBaseInfoExtractor):
185 IE_DESC = 'YouTube.com'
186 _VALID_URL = r"""(?x)^
188 (?:https?://|//) # http(s):// or protocol-independent URL
189 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
190 (?:www\.)?deturl\.com/www\.youtube\.com/|
191 (?:www\.)?pwnyoutube\.com/|
192 (?:www\.)?yourepeat\.com/|
193 tube\.majestyc\.net/|
194 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
195 (?:.*?\#/)? # handle anchor (#/) redirect urls
196 (?: # the various things that can precede the ID:
197 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
198 |(?: # or the v= param in all its forms
199 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
200 (?:\?|\#!?) # the params delimiter ? or # or #!
201 (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
205 |youtu\.be/ # just youtu.be/xxxx
206 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
208 )? # all until now is optional -> you can pass the naked ID
209 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
210 (?!.*?&list=) # combined list/video URLs are handled by the playlist IE
211 (?(1).+)? # if we found the ID, everything can follow
213 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
215 '5': {'ext': 'flv', 'width': 400, 'height': 240},
216 '6': {'ext': 'flv', 'width': 450, 'height': 270},
217 '13': {'ext': '3gp'},
218 '17': {'ext': '3gp', 'width': 176, 'height': 144},
219 '18': {'ext': 'mp4', 'width': 640, 'height': 360},
220 '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
221 '34': {'ext': 'flv', 'width': 640, 'height': 360},
222 '35': {'ext': 'flv', 'width': 854, 'height': 480},
223 '36': {'ext': '3gp', 'width': 320, 'height': 240},
224 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
225 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
226 '43': {'ext': 'webm', 'width': 640, 'height': 360},
227 '44': {'ext': 'webm', 'width': 854, 'height': 480},
228 '45': {'ext': 'webm', 'width': 1280, 'height': 720},
229 '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
230 '59': {'ext': 'mp4', 'width': 854, 'height': 480},
231 '78': {'ext': 'mp4', 'width': 854, 'height': 480},
235 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
236 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
237 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
238 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
239 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
240 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
241 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
243 # Apple HTTP Live Streaming
244 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
245 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
246 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
247 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
248 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
249 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
250 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
253 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
254 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
255 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
256 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
257 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
258 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
259 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
260 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
261 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
262 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
263 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
266 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
267 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
268 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
271 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
272 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
273 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
274 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
275 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
276 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
277 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
278 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
279 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
280 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
281 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
282 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
283 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
284 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
285 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
286 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
287 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
288 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
289 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
290 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
291 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
294 '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
295 '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
297 # Dash webm audio with opus inside
298 '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
299 '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
300 '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
303 '_rtmp': {'protocol': 'rtmp'},
309 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
313 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
314 'uploader': 'Philipp Hagemeister',
315 'uploader_id': 'phihag',
316 'upload_date': '20121002',
317 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
318 'categories': ['Science & Technology'],
319 'tags': ['youtube-dl'],
321 'dislike_count': int,
327 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
328 'note': 'Test generic use_cipher_signature video (#897)',
332 'upload_date': '20120506',
333 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
334 'description': 'md5:782e8651347686cba06e58f71ab51773',
335 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
336 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
337 'iconic ep', 'iconic', 'love', 'it'],
338 'uploader': 'Icona Pop',
339 'uploader_id': 'IconaPop',
343 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
344 'note': 'Test VEVO video with age protection (#956)',
348 'upload_date': '20130703',
349 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
350 'description': 'md5:64249768eec3bc4276236606ea996373',
351 'uploader': 'justintimberlakeVEVO',
352 'uploader_id': 'justintimberlakeVEVO',
357 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
358 'note': 'Embed-only video (#1746)',
362 'upload_date': '20120608',
363 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
364 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
365 'uploader': 'SET India',
366 'uploader_id': 'setindia'
370 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
371 'note': 'Use the first video ID in the URL',
375 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
376 'uploader': 'Philipp Hagemeister',
377 'uploader_id': 'phihag',
378 'upload_date': '20121002',
379 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
380 'categories': ['Science & Technology'],
381 'tags': ['youtube-dl'],
383 'dislike_count': int,
386 'skip_download': True,
390 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
391 'note': '256k DASH audio (format 141) via DASH manifest',
395 'upload_date': '20121002',
396 'uploader_id': '8KVIDEO',
398 'uploader': '8KVIDEO',
399 'title': 'UHDTV TEST 8K VIDEO.mp4'
402 'youtube_include_dash_manifest': True,
406 # DASH manifest with encrypted signature
408 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
412 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
413 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
414 'uploader': 'AfrojackVEVO',
415 'uploader_id': 'AfrojackVEVO',
416 'upload_date': '20131011',
419 'youtube_include_dash_manifest': True,
423 # JS player signature function name containing $
425 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
429 'title': 'Taylor Swift - Shake It Off',
430 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
431 'uploader': 'TaylorSwiftVEVO',
432 'uploader_id': 'TaylorSwiftVEVO',
433 'upload_date': '20140818',
436 'youtube_include_dash_manifest': True,
442 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
446 'upload_date': '20100909',
447 'uploader': 'The Amazing Atheist',
448 'uploader_id': 'TheAmazingAtheist',
449 'title': 'Burning Everyone\'s Koran',
450 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
453 # Normal age-gate video (No vevo, embed allowed)
455 'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
459 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
460 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
461 'uploader': 'The Witcher',
462 'uploader_id': 'WitcherGame',
463 'upload_date': '20140605',
467 # Age-gate video with encrypted signature
469 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
473 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
474 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
475 'uploader': 'LloydVEVO',
476 'uploader_id': 'LloydVEVO',
477 'upload_date': '20110629',
481 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
483 'url': '__2ABJjxzNo',
487 'upload_date': '20100430',
488 'uploader_id': 'deadmau5',
489 'description': 'md5:12c56784b8032162bb936a5f76d55360',
490 'uploader': 'deadmau5',
491 'title': 'Deadmau5 - Some Chords (HD)',
493 'expected_warnings': [
494 'DASH manifest missing',
497 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
499 'url': 'lqQg6PlCWgI',
503 'upload_date': '20120724',
504 'uploader_id': 'olympic',
505 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
506 'uploader': 'Olympics',
507 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
510 'skip_download': 'requires avconv',
515 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
519 'stretched_ratio': 16 / 9.,
520 'upload_date': '20110310',
521 'uploader_id': 'AllenMeow',
522 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
524 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
527 # url_encoded_fmt_stream_map is empty string
529 'url': 'qEJwOuvDf7I',
533 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
535 'upload_date': '20150404',
536 'uploader_id': 'spbelect',
537 'uploader': 'Наблюдатели Петербурга',
540 'skip_download': 'requires avconv',
543 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
545 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
549 'title': 'md5:7b81415841e02ecd4313668cde88737a',
550 'description': 'md5:116377fd2963b81ec4ce64b542173306',
551 'upload_date': '20150625',
552 'uploader_id': 'dorappi2000',
553 'uploader': 'dorappi2000',
554 'formats': 'mincount:33',
557 # DASH manifest with segment_list
559 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
560 'md5': '8ce563a1d667b599d21064e982ab9e31',
564 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
565 'uploader': 'Airtek',
566 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
567 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
568 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
571 'youtube_include_dash_manifest': True,
572 'format': '135', # bestvideo
576 # Multifeed videos (multiple cameras), URL is for Main Camera
577 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
580 'title': 'teamPGP: Rocket League Noob Stream',
581 'description': 'md5:dc7872fb300e143831327f1bae3af010',
587 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
588 'description': 'md5:dc7872fb300e143831327f1bae3af010',
589 'upload_date': '20150721',
590 'uploader': 'Beer Games Beer',
591 'uploader_id': 'beergamesbeer',
597 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
598 'description': 'md5:dc7872fb300e143831327f1bae3af010',
599 'upload_date': '20150721',
600 'uploader': 'Beer Games Beer',
601 'uploader_id': 'beergamesbeer',
607 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
608 'description': 'md5:dc7872fb300e143831327f1bae3af010',
609 'upload_date': '20150721',
610 'uploader': 'Beer Games Beer',
611 'uploader_id': 'beergamesbeer',
617 'title': 'teamPGP: Rocket League Noob Stream (zim)',
618 'description': 'md5:dc7872fb300e143831327f1bae3af010',
619 'upload_date': '20150721',
620 'uploader': 'Beer Games Beer',
621 'uploader_id': 'beergamesbeer',
625 'skip_download': True,
630 def __init__(self, *args, **kwargs):
631 super(YoutubeIE, self).__init__(*args, **kwargs)
632 self._player_cache = {}
634 def report_video_info_webpage_download(self, video_id):
635 """Report attempt to download video info webpage."""
636 self.to_screen('%s: Downloading video info webpage' % video_id)
638 def report_information_extraction(self, video_id):
639 """Report attempt to extract video information."""
640 self.to_screen('%s: Extracting video information' % video_id)
642 def report_unavailable_format(self, video_id, format):
643 """Report extracted video URL."""
644 self.to_screen('%s: Format %s not available' % (video_id, format))
646 def report_rtmp_download(self):
647 """Indicate the download will use the RTMP protocol."""
648 self.to_screen('RTMP download detected')
650 def _signature_cache_id(self, example_sig):
651 """ Return a string representation of a signature """
652 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
654 def _extract_signature_function(self, video_id, player_url, example_sig):
656 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
659 raise ExtractorError('Cannot identify player %r' % player_url)
660 player_type = id_m.group('ext')
661 player_id = id_m.group('id')
663 # Read from filesystem cache
664 func_id = '%s_%s_%s' % (
665 player_type, player_id, self._signature_cache_id(example_sig))
666 assert os.path.basename(func_id) == func_id
668 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
669 if cache_spec is not None:
670 return lambda s: ''.join(s[i] for i in cache_spec)
673 'Downloading player %s' % player_url
674 if self._downloader.params.get('verbose') else
675 'Downloading %s player %s' % (player_type, player_id)
677 if player_type == 'js':
678 code = self._download_webpage(
679 player_url, video_id,
681 errnote='Download of %s failed' % player_url)
682 res = self._parse_sig_js(code)
683 elif player_type == 'swf':
684 urlh = self._request_webpage(
685 player_url, video_id,
687 errnote='Download of %s failed' % player_url)
689 res = self._parse_sig_swf(code)
691 assert False, 'Invalid player type %r' % player_type
693 test_string = ''.join(map(compat_chr, range(len(example_sig))))
694 cache_res = res(test_string)
695 cache_spec = [ord(c) for c in cache_res]
697 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
700 def _print_sig_code(self, func, example_sig):
701 def gen_sig_code(idxs):
702 def _genslice(start, end, step):
703 starts = '' if start == 0 else str(start)
704 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
705 steps = '' if step == 1 else (':%d' % step)
706 return 's[%s%s%s]' % (starts, ends, steps)
709 # Quelch pyflakes warnings - start will be set when step is set
710 start = '(Never used)'
711 for i, prev in zip(idxs[1:], idxs[:-1]):
715 yield _genslice(start, prev, step)
718 if i - prev in [-1, 1]:
727 yield _genslice(start, i, step)
729 test_string = ''.join(map(compat_chr, range(len(example_sig))))
730 cache_res = func(test_string)
731 cache_spec = [ord(c) for c in cache_res]
732 expr_code = ' + '.join(gen_sig_code(cache_spec))
733 signature_id_tuple = '(%s)' % (
734 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
735 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
736 ' return %s\n') % (signature_id_tuple, expr_code)
737 self.to_screen('Extracted signature function:\n' + code)
739 def _parse_sig_js(self, jscode):
740 funcname = self._search_regex(
741 r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
742 'Initial JS player signature function name')
744 jsi = JSInterpreter(jscode)
745 initial_function = jsi.extract_function(funcname)
746 return lambda s: initial_function([s])
748 def _parse_sig_swf(self, file_contents):
749 swfi = SWFInterpreter(file_contents)
750 TARGET_CLASSNAME = 'SignatureDecipher'
751 searched_class = swfi.extract_class(TARGET_CLASSNAME)
752 initial_function = swfi.extract_function(searched_class, 'decipher')
753 return lambda s: initial_function([s])
755 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
756 """Turn the encrypted s field into a working signature"""
758 if player_url is None:
759 raise ExtractorError('Cannot decrypt signature without player_url')
761 if player_url.startswith('//'):
762 player_url = 'https:' + player_url
764 player_id = (player_url, self._signature_cache_id(s))
765 if player_id not in self._player_cache:
766 func = self._extract_signature_function(
767 video_id, player_url, s
769 self._player_cache[player_id] = func
770 func = self._player_cache[player_id]
771 if self._downloader.params.get('youtube_print_sig_code'):
772 self._print_sig_code(func, s)
774 except Exception as e:
775 tb = traceback.format_exc()
776 raise ExtractorError(
777 'Signature extraction failed: ' + tb, cause=e)
779 def _get_subtitles(self, video_id, webpage):
781 subs_doc = self._download_xml(
782 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
783 video_id, note=False)
784 except ExtractorError as err:
785 self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
789 for track in subs_doc.findall('track'):
790 lang = track.attrib['lang_code']
791 if lang in sub_lang_list:
794 for ext in ['sbv', 'vtt', 'srt']:
795 params = compat_urllib_parse.urlencode({
799 'name': track.attrib['name'].encode('utf-8'),
802 'url': 'https://www.youtube.com/api/timedtext?' + params,
805 sub_lang_list[lang] = sub_formats
806 if not sub_lang_list:
807 self._downloader.report_warning('video doesn\'t have subtitles')
811 def _get_automatic_captions(self, video_id, webpage):
812 """We need the webpage for getting the captions url, pass it as an
813 argument to speed up the process."""
814 self.to_screen('%s: Looking for automatic captions' % video_id)
815 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
816 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
818 self._downloader.report_warning(err_msg)
820 player_config = json.loads(mobj.group(1))
822 args = player_config['args']
823 caption_url = args['ttsurl']
824 timestamp = args['timestamp']
825 # We get the available subtitles
826 list_params = compat_urllib_parse.urlencode({
831 list_url = caption_url + '&' + list_params
832 caption_list = self._download_xml(list_url, video_id)
833 original_lang_node = caption_list.find('track')
834 if original_lang_node is None:
835 self._downloader.report_warning('Video doesn\'t have automatic captions')
837 original_lang = original_lang_node.attrib['lang_code']
838 caption_kind = original_lang_node.attrib.get('kind', '')
841 for lang_node in caption_list.findall('target'):
842 sub_lang = lang_node.attrib['lang_code']
844 for ext in ['sbv', 'vtt', 'srt']:
845 params = compat_urllib_parse.urlencode({
846 'lang': original_lang,
850 'kind': caption_kind,
853 'url': caption_url + '&' + params,
856 sub_lang_list[sub_lang] = sub_formats
858 # An extractor error can be raise by the download process if there are
859 # no automatic captions but there are subtitles
860 except (KeyError, ExtractorError):
861 self._downloader.report_warning(err_msg)
865 def extract_id(cls, url):
866 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
868 raise ExtractorError('Invalid URL: %s' % url)
869 video_id = mobj.group(2)
872 def _extract_from_m3u8(self, manifest_url, video_id):
875 def _get_urls(_manifest):
876 lines = _manifest.split('\n')
877 urls = filter(lambda l: l and not l.startswith('#'),
880 manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
881 formats_urls = _get_urls(manifest)
882 for format_url in formats_urls:
883 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
884 url_map[itag] = format_url
887 def _extract_annotations(self, video_id):
888 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
889 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
891 def _parse_dash_manifest(
892 self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
893 def decrypt_sig(mobj):
895 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
896 return '/signature/%s' % dec_s
897 dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
898 dash_doc = self._download_xml(
899 dash_manifest_url, video_id,
900 note='Downloading DASH manifest',
901 errnote='Could not download DASH manifest',
904 if dash_doc is False:
908 for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
909 mime_type = a.attrib.get('mimeType')
910 for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
911 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
914 if mime_type == 'text/vtt':
915 # TODO implement WebVTT downloading
917 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
918 segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
919 format_id = r.attrib['id']
920 video_url = url_el.text
921 filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
923 'format_id': format_id,
925 'width': int_or_none(r.attrib.get('width')),
926 'height': int_or_none(r.attrib.get('height')),
927 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
928 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
929 'filesize': filesize,
930 'fps': int_or_none(r.attrib.get('frameRate')),
932 if segment_list is not None:
934 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
935 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
936 'protocol': 'http_dash_segments',
939 existing_format = next(
941 if fo['format_id'] == format_id)
942 except StopIteration:
943 full_info = self._formats.get(format_id, {}).copy()
945 codecs = r.attrib.get('codecs')
947 if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
948 full_info['vcodec'] = codecs
949 elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
950 full_info['acodec'] = codecs
951 formats.append(full_info)
953 existing_format.update(f)
955 self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
958 def _real_extract(self, url):
959 url, smuggled_data = unsmuggle_url(url, {})
962 'http' if self._downloader.params.get('prefer_insecure', False)
967 parsed_url = compat_urllib_parse_urlparse(url)
968 for component in [parsed_url.fragment, parsed_url.query]:
969 query = compat_parse_qs(component)
970 if start_time is None and 't' in query:
971 start_time = parse_duration(query['t'][0])
972 if start_time is None and 'start' in query:
973 start_time = parse_duration(query['start'][0])
974 if end_time is None and 'end' in query:
975 end_time = parse_duration(query['end'][0])
977 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
978 mobj = re.search(self._NEXT_URL_RE, url)
980 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
981 video_id = self.extract_id(url)
984 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
985 video_webpage = self._download_webpage(url, video_id)
987 # Attempt to extract SWF player URL
988 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
990 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
996 def add_dash_mpd(video_info):
997 dash_mpd = video_info.get('dashmpd')
998 if dash_mpd and dash_mpd[0] not in dash_mpds:
999 dash_mpds.append(dash_mpd[0])
1002 embed_webpage = None
1004 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1006 # We simulate the access to the video from www.youtube.com/v/{video_id}
1007 # this can be viewed without login into Youtube
1008 url = proto + '://www.youtube.com/embed/%s' % video_id
1009 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1010 data = compat_urllib_parse.urlencode({
1011 'video_id': video_id,
1012 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1013 'sts': self._search_regex(
1014 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1016 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1017 video_info_webpage = self._download_webpage(
1018 video_info_url, video_id,
1019 note='Refetching age-gated info webpage',
1020 errnote='unable to download video info webpage')
1021 video_info = compat_parse_qs(video_info_webpage)
1022 add_dash_mpd(video_info)
1026 # Try looking directly into the video webpage
1027 mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
1029 json_code = uppercase_escape(mobj.group(1))
1030 ytplayer_config = json.loads(json_code)
1031 args = ytplayer_config['args']
1032 if args.get('url_encoded_fmt_stream_map'):
1033 # Convert to the same format returned by compat_parse_qs
1034 video_info = dict((k, [v]) for k, v in args.items())
1035 add_dash_mpd(video_info)
1036 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1038 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1039 # We also try looking in get_video_info since it may contain different dashmpd
1040 # URL that points to a DASH manifest with possibly different itag set (some itags
1041 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1042 # manifest pointed by get_video_info's dashmpd).
1043 # The general idea is to take a union of itags of both DASH manifests (for example
1044 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1045 self.report_video_info_webpage_download(video_id)
1046 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1048 '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1049 % (proto, video_id, el_type))
1050 video_info_webpage = self._download_webpage(
1052 video_id, note=False,
1053 errnote='unable to download video info webpage')
1054 get_video_info = compat_parse_qs(video_info_webpage)
1055 if get_video_info.get('use_cipher_signature') != ['True']:
1056 add_dash_mpd(get_video_info)
1058 video_info = get_video_info
1059 if 'token' in get_video_info:
1061 if 'token' not in video_info:
1062 if 'reason' in video_info:
1063 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1064 regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1066 raise ExtractorError('YouTube said: This video is available in %s only' % (
1067 ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1069 raise ExtractorError(
1070 'YouTube said: %s' % video_info['reason'][0],
1071 expected=True, video_id=video_id)
1073 raise ExtractorError(
1074 '"token" parameter not in video info for unknown reason',
1078 if 'title' in video_info:
1079 video_title = video_info['title'][0]
1081 self._downloader.report_warning('Unable to extract video title')
1085 video_description = get_element_by_id("eow-description", video_webpage)
1086 if video_description:
1087 video_description = re.sub(r'''(?x)
1089 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1091 (?:[a-zA-Z-]+="[^"]+"\s+)*?
1092 class="yt-uix-redirect-link"\s*>
1095 ''', r'\1', video_description)
1096 video_description = clean_html(video_description)
1098 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1100 video_description = unescapeHTML(fd_mobj.group(1))
1102 video_description = ''
1104 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1105 if not self._downloader.params.get('noplaylist'):
1108 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1109 for feed in multifeed_metadata_list.split(','):
1110 feed_data = compat_parse_qs(feed)
1112 '_type': 'url_transparent',
1113 'ie_key': 'Youtube',
1115 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1116 {'force_singlefeed': True}),
1117 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1119 feed_ids.append(feed_data['id'][0])
1121 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1122 % (', '.join(feed_ids), video_id))
1123 return self.playlist_result(entries, video_id, video_title, video_description)
1124 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1126 if 'view_count' in video_info:
1127 view_count = int(video_info['view_count'][0])
1131 # Check for "rental" videos
1132 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1133 raise ExtractorError('"rental" videos not supported')
1135 # Start extracting information
1136 self.report_information_extraction(video_id)
1139 if 'author' not in video_info:
1140 raise ExtractorError('Unable to extract uploader name')
1141 video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1144 video_uploader_id = None
1145 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1146 if mobj is not None:
1147 video_uploader_id = mobj.group(1)
1149 self._downloader.report_warning('unable to extract uploader nickname')
1152 # We try first to get a high quality image:
1153 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1154 video_webpage, re.DOTALL)
1155 if m_thumb is not None:
1156 video_thumbnail = m_thumb.group(1)
1157 elif 'thumbnail_url' not in video_info:
1158 self._downloader.report_warning('unable to extract video thumbnail')
1159 video_thumbnail = None
1160 else: # don't panic if we can't find it
1161 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1164 upload_date = self._html_search_meta(
1165 'datePublished', video_webpage, 'upload date', default=None)
1167 upload_date = self._search_regex(
1168 [r'(?s)id="eow-date.*?>(.*?)</span>',
1169 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1170 video_webpage, 'upload date', default=None)
1172 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1173 upload_date = unified_strdate(upload_date)
1175 m_cat_container = self._search_regex(
1176 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1177 video_webpage, 'categories', default=None)
1179 category = self._html_search_regex(
1180 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1182 video_categories = None if category is None else [category]
1184 video_categories = None
1187 unescapeHTML(m.group('content'))
1188 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1190 def _extract_count(count_name):
1191 return str_to_int(self._search_regex(
1192 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1193 % re.escape(count_name),
1194 video_webpage, count_name, default=None))
1196 like_count = _extract_count('like')
1197 dislike_count = _extract_count('dislike')
1200 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1201 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1203 if 'length_seconds' not in video_info:
1204 self._downloader.report_warning('unable to extract video duration')
1205 video_duration = None
1207 video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1210 video_annotations = None
1211 if self._downloader.params.get('writeannotations', False):
1212 video_annotations = self._extract_annotations(video_id)
1214 def _map_to_format_list(urlmap):
1216 for itag, video_real_url in urlmap.items():
1219 'url': video_real_url,
1220 'player_url': player_url,
1222 if itag in self._formats:
1223 dct.update(self._formats[itag])
1227 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1228 self.report_rtmp_download()
1230 'format_id': '_rtmp',
1232 'url': video_info['conn'][0],
1233 'player_url': player_url,
1235 elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1236 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1237 if 'rtmpe%3Dyes' in encoded_url_map:
1238 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1240 for url_data_str in encoded_url_map.split(','):
1241 url_data = compat_parse_qs(url_data_str)
1242 if 'itag' not in url_data or 'url' not in url_data:
1244 format_id = url_data['itag'][0]
1245 url = url_data['url'][0]
1247 if 'sig' in url_data:
1248 url += '&signature=' + url_data['sig'][0]
1249 elif 's' in url_data:
1250 encrypted_sig = url_data['s'][0]
1251 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1253 jsplayer_url_json = self._search_regex(
1255 embed_webpage if age_gate else video_webpage,
1256 'JS player URL (1)', default=None)
1257 if not jsplayer_url_json and not age_gate:
1258 # We need the embed website after all
1259 if embed_webpage is None:
1260 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1261 embed_webpage = self._download_webpage(
1262 embed_url, video_id, 'Downloading embed webpage')
1263 jsplayer_url_json = self._search_regex(
1264 ASSETS_RE, embed_webpage, 'JS player URL')
1266 player_url = json.loads(jsplayer_url_json)
1267 if player_url is None:
1268 player_url_json = self._search_regex(
1269 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1270 video_webpage, 'age gate player URL')
1271 player_url = json.loads(player_url_json)
1273 if self._downloader.params.get('verbose'):
1274 if player_url is None:
1275 player_version = 'unknown'
1276 player_desc = 'unknown'
1278 if player_url.endswith('swf'):
1279 player_version = self._search_regex(
1280 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1281 'flash player', fatal=False)
1282 player_desc = 'flash player %s' % player_version
1284 player_version = self._search_regex(
1285 r'html5player-([^/]+?)(?:/html5player)?\.js',
1287 'html5 player', fatal=False)
1288 player_desc = 'html5 player %s' % player_version
1290 parts_sizes = self._signature_cache_id(encrypted_sig)
1291 self.to_screen('{%s} signature length %s, %s' %
1292 (format_id, parts_sizes, player_desc))
1294 signature = self._decrypt_signature(
1295 encrypted_sig, video_id, player_url, age_gate)
1296 url += '&signature=' + signature
1297 if 'ratebypass' not in url:
1298 url += '&ratebypass=yes'
1299 url_map[format_id] = url
1300 formats = _map_to_format_list(url_map)
1301 elif video_info.get('hlsvp'):
1302 manifest_url = video_info['hlsvp'][0]
1303 url_map = self._extract_from_m3u8(manifest_url, video_id)
1304 formats = _map_to_format_list(url_map)
1306 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1308 # Look for the DASH manifest
1309 if self._downloader.params.get('youtube_include_dash_manifest', True):
1310 dash_mpd_fatal = True
1311 for dash_manifest_url in dash_mpds:
1314 for df in self._parse_dash_manifest(
1315 video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1316 # Do not overwrite DASH format found in some previous DASH manifest
1317 if df['format_id'] not in dash_formats:
1318 dash_formats[df['format_id']] = df
1319 # Additional DASH manifests may end up in HTTP Error 403 therefore
1320 # allow them to fail without bug report message if we already have
1321 # some DASH manifest succeeded. This is temporary workaround to reduce
1322 # burst of bug reports until we figure out the reason and whether it
1323 # can be fixed at all.
1324 dash_mpd_fatal = False
1325 except (ExtractorError, KeyError) as e:
1326 self.report_warning(
1327 'Skipping DASH manifest: %r' % e, video_id)
1329 # Remove the formats we found through non-DASH, they
1330 # contain less info and it can be wrong, because we use
1331 # fixed values (for example the resolution). See
1332 # https://github.com/rg3/youtube-dl/issues/5774 for an
1334 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1335 formats.extend(dash_formats.values())
1337 # Check for malformed aspect ratio
1338 stretched_m = re.search(
1339 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1342 ratio = float(stretched_m.group('w')) / float(stretched_m.group('h'))
1344 if f.get('vcodec') != 'none':
1345 f['stretched_ratio'] = ratio
1347 self._sort_formats(formats)
1351 'uploader': video_uploader,
1352 'uploader_id': video_uploader_id,
1353 'upload_date': upload_date,
1354 'title': video_title,
1355 'thumbnail': video_thumbnail,
1356 'description': video_description,
1357 'categories': video_categories,
1359 'subtitles': video_subtitles,
1360 'automatic_captions': automatic_captions,
1361 'duration': video_duration,
1362 'age_limit': 18 if age_gate else 0,
1363 'annotations': video_annotations,
1364 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1365 'view_count': view_count,
1366 'like_count': like_count,
1367 'dislike_count': dislike_count,
1368 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1371 'start_time': start_time,
1372 'end_time': end_time,
1376 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
1377 IE_DESC = 'YouTube.com playlists'
1378 _VALID_URL = r"""(?x)(?:
1383 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1384 \? (?:.*?&)*? (?:p|a|list)=
1388 (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1389 # Top tracks, they can also include dots
1394 ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1396 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1397 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
1398 IE_NAME = 'youtube:playlist'
1400 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1402 'title': 'ytdl test PL',
1403 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1405 'playlist_count': 3,
1407 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1409 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1410 'title': 'YDL_Empty_List',
1412 'playlist_count': 0,
1414 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1415 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1417 'title': '29C3: Not my department',
1418 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1420 'playlist_count': 95,
1422 'note': 'issue #673',
1423 'url': 'PLBB231211A4F62143',
1425 'title': '[OLD]Team Fortress 2 (Class-based LP)',
1426 'id': 'PLBB231211A4F62143',
1428 'playlist_mincount': 26,
1430 'note': 'Large playlist',
1431 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1433 'title': 'Uploads from Cauchemar',
1434 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1436 'playlist_mincount': 799,
1438 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1440 'title': 'YDL_safe_search',
1441 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1443 'playlist_count': 2,
1446 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1447 'playlist_count': 4,
1450 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1453 'note': 'Embedded SWF player',
1454 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1455 'playlist_count': 4,
1458 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1461 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1462 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1464 'title': 'Uploads from Interstellar Movie',
1465 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1467 'playlist_mincout': 21,
1470 def _real_initialize(self):
1473 def _extract_mix(self, playlist_id):
1474 # The mixes are generated from a single video
1475 # the id of the playlist is just 'RD' + video_id
1476 url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1477 webpage = self._download_webpage(
1478 url, playlist_id, 'Downloading Youtube mix')
1479 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1481 search_title('playlist-title') or
1482 search_title('title long-title') or
1483 search_title('title'))
1484 title = clean_html(title_span)
1485 ids = orderedSet(re.findall(
1486 r'''(?xs)data-video-username=".*?".*?
1487 href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
1489 url_results = self._ids_to_results(ids)
1491 return self.playlist_result(url_results, playlist_id, title)
1493 def _extract_playlist(self, playlist_id):
1494 url = self._TEMPLATE_URL % playlist_id
1495 page = self._download_webpage(url, playlist_id)
1497 for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1498 match = match.strip()
1499 # Check if the playlist exists or is private
1500 if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1501 raise ExtractorError(
1502 'The playlist doesn\'t exist or is private, use --username or '
1503 '--netrc to access it.',
1505 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1506 raise ExtractorError(
1507 'Invalid parameters. Maybe URL is incorrect.',
1509 elif re.match(r'[^<]*Choose your language[^<]*', match):
1512 self.report_warning('Youtube gives an alert message: ' + match)
1514 # Extract the video ids from the playlist pages
1516 more_widget_html = content_html = page
1517 for page_num in itertools.count(1):
1518 matches = re.finditer(self._VIDEO_RE, content_html)
1519 # We remove the duplicates and the link with index 0
1520 # (it's not the first video of the playlist)
1521 new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
1522 for vid_id in new_ids:
1523 yield self.url_result(vid_id, 'Youtube', video_id=vid_id)
1525 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1529 more = self._download_json(
1530 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
1531 'Downloading page #%s' % page_num,
1532 transform_source=uppercase_escape)
1533 content_html = more['content_html']
1534 if not content_html.strip():
1535 # Some webpages show a "Load more" button but they don't
1538 more_widget_html = more['load_more_widget_html']
1540 playlist_title = self._html_search_regex(
1541 r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
1544 return self.playlist_result(_entries(), playlist_id, playlist_title)
1546 def _real_extract(self, url):
1547 # Extract playlist id
1548 mobj = re.match(self._VALID_URL, url)
1550 raise ExtractorError('Invalid URL: %s' % url)
1551 playlist_id = mobj.group(1) or mobj.group(2)
1553 # Check if it's a video-specific URL
1554 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1555 if 'v' in query_dict:
1556 video_id = query_dict['v'][0]
1557 if self._downloader.params.get('noplaylist'):
1558 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1559 return self.url_result(video_id, 'Youtube', video_id=video_id)
1561 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1563 if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1564 # Mixes require a custom extraction process
1565 return self._extract_mix(playlist_id)
1567 return self._extract_playlist(playlist_id)
1570 class YoutubeChannelIE(InfoExtractor):
1571 IE_DESC = 'YouTube.com channels'
1572 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1573 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1574 IE_NAME = 'youtube:channel'
1576 'note': 'paginated channel',
1577 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1578 'playlist_mincount': 91,
1580 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
1585 def extract_videos_from_page(page):
1588 for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
1589 video_id = mobj.group('id')
1590 video_title = unescapeHTML(mobj.group('title'))
1592 idx = ids_in_page.index(video_id)
1593 if video_title and not titles_in_page[idx]:
1594 titles_in_page[idx] = video_title
1596 ids_in_page.append(video_id)
1597 titles_in_page.append(video_title)
1598 return zip(ids_in_page, titles_in_page)
1600 def _real_extract(self, url):
1601 channel_id = self._match_id(url)
1603 url = self._TEMPLATE_URL % channel_id
1605 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1606 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1607 # otherwise fallback on channel by page extraction
1608 channel_page = self._download_webpage(
1609 url + '?view=57', channel_id,
1610 'Downloading channel page', fatal=False)
1611 channel_playlist_id = self._html_search_meta(
1612 'channelId', channel_page, 'channel id', default=None)
1613 if not channel_playlist_id:
1614 channel_playlist_id = self._search_regex(
1615 r'data-channel-external-id="([^"]+)"',
1616 channel_page, 'channel id', default=None)
1617 if channel_playlist_id and channel_playlist_id.startswith('UC'):
1618 playlist_id = 'UU' + channel_playlist_id[2:]
1619 return self.url_result(
1620 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1622 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1623 autogenerated = re.search(r'''(?x)
1625 channel-header-autogenerated-label|
1626 yt-channel-title-autogenerated
1627 )[^"]*"''', channel_page) is not None
1630 # The videos are contained in a single page
1631 # the ajax pages can't be used, they are empty
1634 video_id, 'Youtube', video_id=video_id,
1635 video_title=video_title)
1636 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1637 return self.playlist_result(entries, channel_id)
1640 more_widget_html = content_html = channel_page
1641 for pagenum in itertools.count(1):
1643 for video_id, video_title in self.extract_videos_from_page(content_html):
1644 yield self.url_result(
1645 video_id, 'Youtube', video_id=video_id,
1646 video_title=video_title)
1649 r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
1654 more = self._download_json(
1655 'https://youtube.com/%s' % mobj.group('more'), channel_id,
1656 'Downloading page #%s' % (pagenum + 1),
1657 transform_source=uppercase_escape)
1658 content_html = more['content_html']
1659 more_widget_html = more['load_more_widget_html']
1661 return self.playlist_result(_entries(), channel_id)
1664 class YoutubeUserIE(YoutubeChannelIE):
1665 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1666 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1667 _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1668 IE_NAME = 'youtube:user'
1671 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1672 'playlist_mincount': 320,
1674 'title': 'TheLinuxFoundation',
1677 'url': 'ytuser:phihag',
1678 'only_matching': True,
1682 def suitable(cls, url):
1683 # Don't return True if the url can be extracted with other youtube
1684 # extractor, the regex would is too permissive and it would match.
1685 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1686 if any(ie.suitable(url) for ie in other_ies):
1689 return super(YoutubeUserIE, cls).suitable(url)
1692 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1693 IE_DESC = 'YouTube.com searches'
1694 # there doesn't appear to be a real limit, for example if you search for
1695 # 'python' you get more than 8.000.000 results
1696 _MAX_RESULTS = float('inf')
1697 IE_NAME = 'youtube:search'
1698 _SEARCH_KEY = 'ytsearch'
1699 _EXTRA_QUERY_ARGS = {}
1702 def _get_n_results(self, query, n):
1703 """Get a specified number of results for a query"""
1708 for pagenum in itertools.count(1):
1710 'search_query': query.encode('utf-8'),
1714 url_query.update(self._EXTRA_QUERY_ARGS)
1715 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1716 data = self._download_json(
1717 result_url, video_id='query "%s"' % query,
1718 note='Downloading page %s' % pagenum,
1719 errnote='Unable to download API page')
1720 html_content = data[1]['body']['content']
1722 if 'class="search-message' in html_content:
1723 raise ExtractorError(
1724 '[youtube] No video results', expected=True)
1726 new_videos = self._ids_to_results(orderedSet(re.findall(
1727 r'href="/watch\?v=(.{11})', html_content)))
1728 videos += new_videos
1729 if not new_videos or len(videos) > limit:
1734 return self.playlist_result(videos, query)
1737 class YoutubeSearchDateIE(YoutubeSearchIE):
1738 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1739 _SEARCH_KEY = 'ytsearchdate'
1740 IE_DESC = 'YouTube.com searches, newest videos first'
1741 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1744 class YoutubeSearchURLIE(InfoExtractor):
1745 IE_DESC = 'YouTube.com search URLs'
1746 IE_NAME = 'youtube:search_url'
1747 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1749 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1750 'playlist_mincount': 5,
1752 'title': 'youtube-dl test video',
1756 def _real_extract(self, url):
1757 mobj = re.match(self._VALID_URL, url)
1758 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1760 webpage = self._download_webpage(url, query)
1761 result_code = self._search_regex(
1762 r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1764 part_codes = re.findall(
1765 r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
1767 for part_code in part_codes:
1768 part_title = self._html_search_regex(
1769 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1770 part_url_snippet = self._html_search_regex(
1771 r'(?s)href="([^"]+)"', part_code, 'item URL')
1772 part_url = compat_urlparse.urljoin(
1773 'https://www.youtube.com/', part_url_snippet)
1777 'title': part_title,
1781 '_type': 'playlist',
1787 class YoutubeShowIE(InfoExtractor):
1788 IE_DESC = 'YouTube.com (multi-season) shows'
1789 _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1790 IE_NAME = 'youtube:show'
1792 'url': 'http://www.youtube.com/show/airdisasters',
1793 'playlist_mincount': 3,
1795 'id': 'airdisasters',
1796 'title': 'Air Disasters',
1800 def _real_extract(self, url):
1801 mobj = re.match(self._VALID_URL, url)
1802 playlist_id = mobj.group('id')
1803 webpage = self._download_webpage(
1804 url, playlist_id, 'Downloading show webpage')
1805 # There's one playlist for each season of the show
1806 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1807 self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
1810 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
1811 for season in m_seasons
1813 title = self._og_search_title(webpage, fatal=False)
1816 '_type': 'playlist',
1823 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1825 Base class for feed extractors
1826 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1828 _LOGIN_REQUIRED = True
1832 return 'youtube:%s' % self._FEED_NAME
1834 def _real_initialize(self):
1837 def _real_extract(self, url):
1838 page = self._download_webpage(
1839 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
1841 # The extraction process is the same as for playlists, but the regex
1842 # for the video ids doesn't contain an index
1844 more_widget_html = content_html = page
1845 for page_num in itertools.count(1):
1846 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
1848 # 'recommended' feed has infinite 'load more' and each new portion spins
1849 # the same videos in (sometimes) slightly different order, so we'll check
1850 # for unicity and break when portion has no new videos
1851 new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
1857 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
1861 more = self._download_json(
1862 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
1863 'Downloading page #%s' % page_num,
1864 transform_source=uppercase_escape)
1865 content_html = more['content_html']
1866 more_widget_html = more['load_more_widget_html']
1868 return self.playlist_result(
1869 self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
1872 class YoutubeWatchLaterIE(YoutubePlaylistIE):
1873 IE_NAME = 'youtube:watchlater'
1874 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
1875 _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
1877 _TESTS = [] # override PlaylistIE tests
1879 def _real_extract(self, url):
1880 return self._extract_playlist('WL')
1883 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1884 IE_NAME = 'youtube:favorites'
1885 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
1886 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1887 _LOGIN_REQUIRED = True
1889 def _real_extract(self, url):
1890 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1891 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
1892 return self.url_result(playlist_id, 'YoutubePlaylist')
1895 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1896 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
1897 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1898 _FEED_NAME = 'recommended'
1899 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1902 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1903 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
1904 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1905 _FEED_NAME = 'subscriptions'
1906 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1909 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
1910 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
1911 _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
1912 _FEED_NAME = 'history'
1913 _PLAYLIST_TITLE = 'Youtube History'
1916 class YoutubeTruncatedURLIE(InfoExtractor):
1917 IE_NAME = 'youtube:truncated_url'
1918 IE_DESC = False # Do not list
1919 _VALID_URL = r'''(?x)
1921 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
1924 annotation_id=annotation_[^&]+|
1929 attribution_link\?a=[^&]+
1935 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
1936 'only_matching': True,
1938 'url': 'http://www.youtube.com/watch?',
1939 'only_matching': True,
1941 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
1942 'only_matching': True,
1944 'url': 'https://www.youtube.com/watch?feature=foo',
1945 'only_matching': True,
1947 'url': 'https://www.youtube.com/watch?hl=en-GB',
1948 'only_matching': True,
1951 def _real_extract(self, url):
1952 raise ExtractorError(
1953 'Did you forget to quote the URL? Remember that & is a meta '
1954 'character in most shells, so you want to put the URL in quotes, '
1956 '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
1957 ' or simply youtube-dl BaW_jenozKc .',
1961 class YoutubeTruncatedIDIE(InfoExtractor):
1962 IE_NAME = 'youtube:truncated_id'
1963 IE_DESC = False # Do not list
1964 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
1967 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
1968 'only_matching': True,
1971 def _real_extract(self, url):
1972 video_id = self._match_id(url)
1973 raise ExtractorError(
1974 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),