9 from .common import InfoExtractor, SearchInfoExtractor
10 from .subtitles import SubtitlesIE
16 compat_urllib_request,
27 class YoutubeBaseInfoExtractor(InfoExtractor):
28 """Provide base functions for Youtube extractors"""
29 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
30 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
31 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
32 _NETRC_MACHINE = 'youtube'
33 # If True it will raise an error if no login info is provided
34 _LOGIN_REQUIRED = False
36 def report_lang(self):
37 """Report attempt to set language."""
38 self.to_screen(u'Setting language')
40 def _set_language(self):
41 request = compat_urllib_request.Request(self._LANG_URL)
44 compat_urllib_request.urlopen(request).read()
45 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
46 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
51 (username, password) = self._get_login_info()
52 # No authentication to be performed
54 if self._LOGIN_REQUIRED:
55 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
58 request = compat_urllib_request.Request(self._LOGIN_URL)
60 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
61 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
62 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
67 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
70 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
76 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
80 u'PersistentCookie': u'yes',
82 u'bgresponse': u'js_disabled',
83 u'checkConnection': u'',
84 u'checkedDomains': u'youtube',
90 u'signIn': u'Sign in',
92 u'service': u'youtube',
96 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
98 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
99 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
100 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
103 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
104 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
105 self._downloader.report_warning(u'unable to log in: bad username or password')
107 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
108 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
112 def _confirm_age(self):
115 'action_confirm': 'Confirm',
117 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
119 self.report_age_confirmation()
120 compat_urllib_request.urlopen(request).read().decode('utf-8')
121 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
122 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
125 def _real_initialize(self):
126 if self._downloader is None:
128 if not self._set_language():
130 if not self._login():
134 class YoutubeSubtitlesIE(SubtitlesIE):
136 def _get_available_subtitles(self, video_id):
137 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
139 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
140 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
141 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
143 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
148 params = compat_urllib_parse.urlencode({
151 'fmt': self._downloader.params.get('subtitlesformat'),
153 url = u'http://www.youtube.com/api/timedtext?' + params
154 sub_lang_list[lang] = url
155 if not sub_lang_list:
156 self._downloader.report_warning(u'video doesn\'t have subtitles')
160 def _request_automatic_caption(self, video_id, webpage):
161 """We need the webpage for getting the captions url, pass it as an
162 argument to speed up the process."""
163 sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
164 sub_format = self._downloader.params.get('subtitlesformat')
165 self.to_screen(u'%s: Looking for automatic captions' % video_id)
166 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
167 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
169 self._downloader.report_warning(err_msg)
171 player_config = json.loads(mobj.group(1))
173 args = player_config[u'args']
174 caption_url = args[u'ttsurl']
175 timestamp = args[u'timestamp']
176 params = compat_urllib_parse.urlencode({
183 subtitles_url = caption_url + '&' + params
184 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
185 return {sub_lang: sub}
186 # An extractor error can be raise by the download process if there are
187 # no automatic captions but there are subtitles
188 except (KeyError, ExtractorError):
189 self._downloader.report_warning(err_msg)
192 class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
193 IE_DESC = u'YouTube.com'
196 (?:https?://)? # http(s):// (optional)
197 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
198 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
199 (?:.*?\#/)? # handle anchor (#/) redirect urls
200 (?: # the various things that can precede the ID:
201 (?:(?:v|embed|e)/) # v/ or embed/ or e/
202 |(?: # or the v= param in all its forms
203 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
204 (?:\?|\#!?) # the params delimiter ? or # or #!
205 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
209 |youtu\.be/ # just youtu.be/xxxx
211 )? # all until now is optional -> you can pass the naked ID
212 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
213 (?(1).+)? # if we found the ID, everything can follow
215 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
216 # Listed in order of quality
217 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
218 # Apple HTTP Live Streaming
219 '96', '95', '94', '93', '92', '132', '151',
221 '85', '84', '102', '83', '101', '82', '100',
223 '138', '137', '248', '136', '247', '135', '246',
224 '245', '244', '134', '243', '133', '242', '160',
226 '141', '172', '140', '171', '139',
228 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
229 # Apple HTTP Live Streaming
230 '96', '95', '94', '93', '92', '132', '151',
232 '85', '102', '84', '101', '83', '100', '82',
234 '138', '248', '137', '247', '136', '246', '245',
235 '244', '135', '243', '134', '242', '133', '160',
237 '172', '141', '171', '140', '139',
239 _video_formats_map = {
240 'flv': ['35', '34', '6', '5'],
241 '3gp': ['36', '17', '13'],
242 'mp4': ['38', '37', '22', '18'],
243 'webm': ['46', '45', '44', '43'],
245 _video_extensions = {
267 # Apple HTTP Live Streaming
299 _video_dimensions = {
381 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
382 u"file": u"BaW_jenozKc.mp4",
384 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
385 u"uploader": u"Philipp Hagemeister",
386 u"uploader_id": u"phihag",
387 u"upload_date": u"20121002",
388 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
392 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
393 u"file": u"1ltcDfZMA3U.flv",
394 u"note": u"Test VEVO video (#897)",
396 u"upload_date": u"20070518",
397 u"title": u"Maps - It Will Find You",
398 u"description": u"Music video by Maps performing It Will Find You.",
399 u"uploader": u"MuteUSA",
400 u"uploader_id": u"MuteUSA"
404 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
405 u"file": u"UxxajLWwzqY.mp4",
406 u"note": u"Test generic use_cipher_signature video (#897)",
408 u"upload_date": u"20120506",
409 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
410 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
411 u"uploader": u"Icona Pop",
412 u"uploader_id": u"IconaPop"
416 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
417 u"file": u"07FYdnEawAQ.mp4",
418 u"note": u"Test VEVO video with age protection (#956)",
420 u"upload_date": u"20130703",
421 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
422 u"description": u"md5:64249768eec3bc4276236606ea996373",
423 u"uploader": u"justintimberlakeVEVO",
424 u"uploader_id": u"justintimberlakeVEVO"
428 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
429 u'file': u'TGi3HqYrWHE.mp4',
430 u'note': u'm3u8 video',
432 u'title': u'Triathlon - Men - London 2012 Olympic Games',
433 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
434 u'uploader': u'olympic',
435 u'upload_date': u'20120807',
436 u'uploader_id': u'olympic',
439 u'skip_download': True,
446 def suitable(cls, url):
447 """Receives a URL and returns True if suitable for this IE."""
448 if YoutubePlaylistIE.suitable(url): return False
449 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
451 def report_video_webpage_download(self, video_id):
452 """Report attempt to download video webpage."""
453 self.to_screen(u'%s: Downloading video webpage' % video_id)
455 def report_video_info_webpage_download(self, video_id):
456 """Report attempt to download video info webpage."""
457 self.to_screen(u'%s: Downloading video info webpage' % video_id)
459 def report_information_extraction(self, video_id):
460 """Report attempt to extract video information."""
461 self.to_screen(u'%s: Extracting video information' % video_id)
463 def report_unavailable_format(self, video_id, format):
464 """Report extracted video URL."""
465 self.to_screen(u'%s: Format %s not available' % (video_id, format))
467 def report_rtmp_download(self):
468 """Indicate the download will use the RTMP protocol."""
469 self.to_screen(u'RTMP download detected')
471 def _decrypt_signature(self, s):
472 """Turn the encrypted s field into a working signature"""
475 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
477 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
479 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
481 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
483 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
485 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
487 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
489 return s[81:36:-1] + s[0] + s[35:2:-1]
491 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
493 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
495 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
497 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
499 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
502 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
504 def _decrypt_signature_age_gate(self, s):
505 # The videos with age protection use another player, so the algorithms
508 return s[2:63] + s[82] + s[64:82] + s[63]
510 # Fallback to the other algortihms
511 return self._decrypt_signature(s)
513 def _print_formats(self, formats):
514 print('Available formats:')
516 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
517 self._video_dimensions.get(x, '???'),
518 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
520 def _extract_id(self, url):
521 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
523 raise ExtractorError(u'Invalid URL: %s' % url)
524 video_id = mobj.group(2)
527 def _get_video_url_list(self, url_map):
529 Transform a dictionary in the format {itag:url} to a list of (itag, url)
530 with the requested formats.
532 req_format = self._downloader.params.get('format', None)
533 format_limit = self._downloader.params.get('format_limit', None)
534 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
535 if format_limit is not None and format_limit in available_formats:
536 format_list = available_formats[available_formats.index(format_limit):]
538 format_list = available_formats
539 existing_formats = [x for x in format_list if x in url_map]
540 if len(existing_formats) == 0:
541 raise ExtractorError(u'no known formats available for video')
542 if self._downloader.params.get('listformats', None):
543 self._print_formats(existing_formats)
545 if req_format is None or req_format == 'best':
546 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
547 elif req_format == 'worst':
548 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
549 elif req_format in ('-1', 'all'):
550 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
552 # Specific formats. We pick the first in a slash-delimeted sequence.
553 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
554 # available in the specified format. For example,
555 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
556 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
557 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
558 req_formats = req_format.split('/')
559 video_url_list = None
560 for rf in req_formats:
562 video_url_list = [(rf, url_map[rf])]
564 if rf in self._video_formats_map:
565 for srf in self._video_formats_map[rf]:
567 video_url_list = [(srf, url_map[srf])]
572 if video_url_list is None:
573 raise ExtractorError(u'requested format not available')
574 return video_url_list
576 def _extract_from_m3u8(self, manifest_url, video_id):
578 def _get_urls(_manifest):
579 lines = _manifest.split('\n')
580 urls = filter(lambda l: l and not l.startswith('#'),
583 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
584 formats_urls = _get_urls(manifest)
585 for format_url in formats_urls:
586 itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
587 url_map[itag] = format_url
590 def _real_extract(self, url):
591 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
592 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
594 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
595 mobj = re.search(self._NEXT_URL_RE, url)
597 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
598 video_id = self._extract_id(url)
601 self.report_video_webpage_download(video_id)
602 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
603 request = compat_urllib_request.Request(url)
605 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
606 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
607 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
609 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
611 # Attempt to extract SWF player URL
612 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
614 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
619 self.report_video_info_webpage_download(video_id)
620 if re.search(r'player-age-gate-content">', video_webpage) is not None:
621 self.report_age_confirmation()
623 # We simulate the access to the video from www.youtube.com/v/{video_id}
624 # this can be viewed without login into Youtube
625 data = compat_urllib_parse.urlencode({'video_id': video_id,
629 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
633 video_info_url = 'https://www.youtube.com/get_video_info?' + data
634 video_info_webpage = self._download_webpage(video_info_url, video_id,
636 errnote='unable to download video info webpage')
637 video_info = compat_parse_qs(video_info_webpage)
640 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
641 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
642 % (video_id, el_type))
643 video_info_webpage = self._download_webpage(video_info_url, video_id,
645 errnote='unable to download video info webpage')
646 video_info = compat_parse_qs(video_info_webpage)
647 if 'token' in video_info:
649 if 'token' not in video_info:
650 if 'reason' in video_info:
651 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
653 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
655 # Check for "rental" videos
656 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
657 raise ExtractorError(u'"rental" videos not supported')
659 # Start extracting information
660 self.report_information_extraction(video_id)
663 if 'author' not in video_info:
664 raise ExtractorError(u'Unable to extract uploader name')
665 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
668 video_uploader_id = None
669 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
671 video_uploader_id = mobj.group(1)
673 self._downloader.report_warning(u'unable to extract uploader nickname')
676 if 'title' not in video_info:
677 raise ExtractorError(u'Unable to extract video title')
678 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
681 # We try first to get a high quality image:
682 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
683 video_webpage, re.DOTALL)
684 if m_thumb is not None:
685 video_thumbnail = m_thumb.group(1)
686 elif 'thumbnail_url' not in video_info:
687 self._downloader.report_warning(u'unable to extract video thumbnail')
689 else: # don't panic if we can't find it
690 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
694 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
696 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
697 upload_date = unified_strdate(upload_date)
700 video_description = get_element_by_id("eow-description", video_webpage)
701 if video_description:
702 video_description = clean_html(video_description)
704 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
706 video_description = unescapeHTML(fd_mobj.group(1))
708 video_description = u''
711 video_subtitles = None
713 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
714 video_subtitles = self._extract_subtitles(video_id)
715 elif self._downloader.params.get('writeautomaticsub', False):
716 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
718 if self._downloader.params.get('listsubtitles', False):
719 self._list_available_subtitles(video_id)
722 if 'length_seconds' not in video_info:
723 self._downloader.report_warning(u'unable to extract video duration')
726 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
728 # Decide which formats to download
731 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
733 raise ValueError('Could not find vevo ID')
734 info = json.loads(mobj.group(1))
736 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
737 # this signatures are encrypted
738 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
740 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
741 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
742 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
744 if 'url_encoded_fmt_stream_map' in video_info:
745 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
747 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
748 elif 'adaptive_fmts' in video_info:
749 if 'url_encoded_fmt_stream_map' in video_info:
750 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
752 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
756 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
757 self.report_rtmp_download()
758 video_url_list = [(None, video_info['conn'][0])]
759 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
760 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
761 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
763 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
764 url_data = compat_parse_qs(url_data_str)
765 if 'itag' in url_data and 'url' in url_data:
766 url = url_data['url'][0]
767 if 'sig' in url_data:
768 url += '&signature=' + url_data['sig'][0]
769 elif 's' in url_data:
770 if self._downloader.params.get('verbose'):
773 player_version = self._search_regex(r'ad3-(.+?)\.swf',
774 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
775 'flash player', fatal=False)
776 player = 'flash player %s' % player_version
778 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
779 'html5 player', fatal=False)
780 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
781 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
782 (len(s), parts_sizes, url_data['itag'][0], player))
783 encrypted_sig = url_data['s'][0]
785 signature = self._decrypt_signature_age_gate(encrypted_sig)
787 signature = self._decrypt_signature(encrypted_sig)
788 url += '&signature=' + signature
789 if 'ratebypass' not in url:
790 url += '&ratebypass=yes'
791 url_map[url_data['itag'][0]] = url
792 video_url_list = self._get_video_url_list(url_map)
793 if not video_url_list:
795 elif video_info.get('hlsvp'):
796 manifest_url = video_info['hlsvp'][0]
797 url_map = self._extract_from_m3u8(manifest_url, video_id)
798 video_url_list = self._get_video_url_list(url_map)
799 if not video_url_list:
803 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
806 for format_param, video_real_url in video_url_list:
808 video_extension = self._video_extensions.get(format_param, 'flv')
810 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
811 self._video_dimensions.get(format_param, '???'),
812 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
816 'url': video_real_url,
817 'uploader': video_uploader,
818 'uploader_id': video_uploader_id,
819 'upload_date': upload_date,
820 'title': video_title,
821 'ext': video_extension,
822 'format': video_format,
823 'thumbnail': video_thumbnail,
824 'description': video_description,
825 'player_url': player_url,
826 'subtitles': video_subtitles,
827 'duration': video_duration
831 class YoutubePlaylistIE(InfoExtractor):
832 IE_DESC = u'YouTube.com playlists'
838 (?:course|view_play_list|my_playlists|artist|playlist|watch)
839 \? (?:.*?&)*? (?:p|a|list)=
842 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
845 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
847 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
849 IE_NAME = u'youtube:playlist'
852 def suitable(cls, url):
853 """Receives a URL and returns True if suitable for this IE."""
854 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
856 def _real_extract(self, url):
857 # Extract playlist id
858 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
860 raise ExtractorError(u'Invalid URL: %s' % url)
862 # Download playlist videos from API
863 playlist_id = mobj.group(1) or mobj.group(2)
866 for page_num in itertools.count(1):
867 start_index = self._MAX_RESULTS * (page_num - 1) + 1
868 if start_index >= 1000:
869 self._downloader.report_warning(u'Max number of results reached')
871 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
872 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
875 response = json.loads(page)
876 except ValueError as err:
877 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
879 if 'feed' not in response:
880 raise ExtractorError(u'Got a malformed response from YouTube API')
881 playlist_title = response['feed']['title']['$t']
882 if 'entry' not in response['feed']:
883 # Number of videos is a multiple of self._MAX_RESULTS
886 for entry in response['feed']['entry']:
887 index = entry['yt$position']['$t']
888 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
891 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
894 videos = [v[1] for v in sorted(videos)]
896 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
897 return [self.playlist_result(url_results, playlist_id, playlist_title)]
900 class YoutubeChannelIE(InfoExtractor):
901 IE_DESC = u'YouTube.com channels'
902 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
903 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
904 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
905 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
906 IE_NAME = u'youtube:channel'
908 def extract_videos_from_page(self, page):
910 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
911 if mobj.group(1) not in ids_in_page:
912 ids_in_page.append(mobj.group(1))
915 def _real_extract(self, url):
917 mobj = re.match(self._VALID_URL, url)
919 raise ExtractorError(u'Invalid URL: %s' % url)
921 # Download channel page
922 channel_id = mobj.group(1)
926 url = self._TEMPLATE_URL % (channel_id, pagenum)
927 page = self._download_webpage(url, channel_id,
928 u'Downloading page #%s' % pagenum)
930 # Extract video identifiers
931 ids_in_page = self.extract_videos_from_page(page)
932 video_ids.extend(ids_in_page)
934 # Download any subsequent channel pages using the json-based channel_ajax query
935 if self._MORE_PAGES_INDICATOR in page:
936 for pagenum in itertools.count(1):
937 url = self._MORE_PAGES_URL % (pagenum, channel_id)
938 page = self._download_webpage(url, channel_id,
939 u'Downloading page #%s' % pagenum)
941 page = json.loads(page)
943 ids_in_page = self.extract_videos_from_page(page['content_html'])
944 video_ids.extend(ids_in_page)
946 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
949 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
951 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
952 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
953 return [self.playlist_result(url_entries, channel_id)]
956 class YoutubeUserIE(InfoExtractor):
957 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
958 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
959 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
960 _GDATA_PAGE_SIZE = 50
961 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
962 IE_NAME = u'youtube:user'
965 def suitable(cls, url):
966 # Don't return True if the url can be extracted with other youtube
967 # extractor, the regex would is too permissive and it would match.
968 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
969 if any(ie.suitable(url) for ie in other_ies): return False
970 else: return super(YoutubeUserIE, cls).suitable(url)
972 def _real_extract(self, url):
974 mobj = re.match(self._VALID_URL, url)
976 raise ExtractorError(u'Invalid URL: %s' % url)
978 username = mobj.group(1)
980 # Download video ids using YouTube Data API. Result size per
981 # query is limited (currently to 50 videos) so we need to query
982 # page by page until there are no video ids - it means we got
987 for pagenum in itertools.count(0):
988 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
990 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
991 page = self._download_webpage(gdata_url, username,
992 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
995 response = json.loads(page)
996 except ValueError as err:
997 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
999 # Extract video identifiers
1001 for entry in response['feed']['entry']:
1002 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1003 video_ids.extend(ids_in_page)
1005 # A little optimization - if current page is not
1006 # "full", ie. does not contain PAGE_SIZE video ids then
1007 # we can assume that this page is the last one - there
1008 # are no more ids on further pages - no need to query
1011 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1014 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1015 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1016 return [self.playlist_result(url_results, playlist_title = username)]
1018 class YoutubeSearchIE(SearchInfoExtractor):
1019 IE_DESC = u'YouTube.com searches'
1020 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1022 IE_NAME = u'youtube:search'
1023 _SEARCH_KEY = 'ytsearch'
1025 def report_download_page(self, query, pagenum):
1026 """Report attempt to download search page with given number."""
1027 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1029 def _get_n_results(self, query, n):
1030 """Get a specified number of results for a query"""
1036 while (50 * pagenum) < limit:
1037 self.report_download_page(query, pagenum+1)
1038 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1039 request = compat_urllib_request.Request(result_url)
1041 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1042 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1043 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1044 api_response = json.loads(data)['data']
1046 if not 'items' in api_response:
1047 raise ExtractorError(u'[youtube] No video results')
1049 new_ids = list(video['id'] for video in api_response['items'])
1050 video_ids += new_ids
1052 limit = min(n, api_response['totalItems'])
1055 if len(video_ids) > n:
1056 video_ids = video_ids[:n]
1057 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1058 return self.playlist_result(videos, query)
1061 class YoutubeShowIE(InfoExtractor):
1062 IE_DESC = u'YouTube.com (multi-season) shows'
1063 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1064 IE_NAME = u'youtube:show'
1066 def _real_extract(self, url):
1067 mobj = re.match(self._VALID_URL, url)
1068 show_name = mobj.group(1)
1069 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1070 # There's one playlist for each season of the show
1071 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1072 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1073 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1076 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1078 Base class for extractors that fetch info from
1079 http://www.youtube.com/feed_ajax
1080 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1082 _LOGIN_REQUIRED = True
1084 # use action_load_personal_feed instead of action_load_system_feed
1085 _PERSONAL_FEED = False
1088 def _FEED_TEMPLATE(self):
1089 action = 'action_load_system_feed'
1090 if self._PERSONAL_FEED:
1091 action = 'action_load_personal_feed'
1092 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1096 return u'youtube:%s' % self._FEED_NAME
1098 def _real_initialize(self):
1101 def _real_extract(self, url):
1103 # The step argument is available only in 2.7 or higher
1104 for i in itertools.count(0):
1105 paging = i*self._PAGING_STEP
1106 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1107 u'%s feed' % self._FEED_NAME,
1108 u'Downloading page %s' % i)
1109 info = json.loads(info)
1110 feed_html = info['feed_html']
1111 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1112 ids = orderedSet(m.group(1) for m in m_ids)
1113 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1114 if info['paging'] is None:
1116 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1118 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1119 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1120 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1121 _FEED_NAME = 'subscriptions'
1122 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1124 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1125 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1126 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1127 _FEED_NAME = 'recommended'
1128 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1130 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1131 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1132 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1133 _FEED_NAME = 'watch_later'
1134 _PLAYLIST_TITLE = u'Youtube Watch Later'
1136 _PERSONAL_FEED = True
1138 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1139 IE_NAME = u'youtube:favorites'
1140 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1141 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1142 _LOGIN_REQUIRED = True
1144 def _real_extract(self, url):
1145 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1146 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1147 return self.url_result(playlist_id, 'YoutubePlaylist')