X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=414e33b498e30dbe1f14dc5f2ea421ddcd4829ac;hb=8377574c9cb8740e24d45e9b3d30921fd6ec846c;hp=def6f7a74e951882403deff45603a214924ba9a8;hpb=2eb88d953f25e178881c1415c68fea1f770a7ee6;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index def6f7a74..414e33b49 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -4,8 +4,10 @@ import json import netrc import re import socket +import itertools from .common import InfoExtractor, SearchInfoExtractor +from .subtitles import SubtitlesIE from ..utils import ( compat_http_client, compat_parse_qs, @@ -19,12 +21,71 @@ from ..utils import ( ExtractorError, unescapeHTML, unified_strdate, + orderedSet, ) -class YoutubeIE(InfoExtractor): - """Information extractor for youtube.com.""" +class YoutubeSubtitlesIE(SubtitlesIE): + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) + + sub_lang_list = {} + for l in lang_list: + lang = l[1] + params = compat_urllib_parse.urlencode({ + 'lang': lang, + 'v': video_id, + 'fmt': self._downloader.params.get('subtitlesformat'), + }) + url = u'http://www.youtube.com/api/timedtext?' + params + sub_lang_list[lang] = url + if not sub_lang_list: + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + return sub_lang_list + + def _request_automatic_caption(self, video_id, webpage): + """We need the webpage for getting the captions url, pass it as an + argument to speed up the process.""" + sub_lang = self._downloader.params.get('subtitleslang') or 'en' + sub_format = self._downloader.params.get('subtitlesformat') + self.to_screen(u'%s: Looking for automatic captions' % video_id) + mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang + if mobj is None: + self._downloader.report_warning(err_msg) + return {} + player_config = json.loads(mobj.group(1)) + try: + args = player_config[u'args'] + caption_url = args[u'ttsurl'] + timestamp = args[u'timestamp'] + params = compat_urllib_parse.urlencode({ + 'lang': 'en', + 'tlang': sub_lang, + 'fmt': sub_format, + 'ts': timestamp, + 'kind': 'asr', + }) + subtitles_url = caption_url + '&' + params + sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') + return {sub_lang: sub} + # An extractor error can be raise by the download process if there are + # no automatic captions but there are subtitles + except (KeyError, ExtractorError): + self._downloader.report_warning(err_msg) + return {} + + +class YoutubeIE(YoutubeSubtitlesIE): + IE_DESC = u'YouTube.com' _VALID_URL = r"""^ ( (?:https?://)? # http(s):// (optional) @@ -34,7 +95,7 @@ class YoutubeIE(InfoExtractor): (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms - (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= @@ -83,57 +144,65 @@ class YoutubeIE(InfoExtractor): IE_NAME = u'youtube' _TESTS = [ { - "url": "http://www.youtube.com/watch?v=BaW_jenozKc", - "file": "BaW_jenozKc.mp4", - "info_dict": { - "title": "youtube-dl test video \"'/\\ä↭𝕐", - "uploader": "Philipp Hagemeister", - "uploader_id": "phihag", - "upload_date": "20121002", - "description": "test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." + u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc", + u"file": u"BaW_jenozKc.mp4", + u"info_dict": { + u"title": u"youtube-dl test video \"'/\\ä↭𝕐", + u"uploader": u"Philipp Hagemeister", + u"uploader_id": u"phihag", + u"upload_date": u"20121002", + u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." } - }, - { - "url": "http://www.youtube.com/watch?v=1ltcDfZMA3U", - "file": "1ltcDfZMA3U.flv", - "note": "Test VEVO video (#897)", - "info_dict": { - "upload_date": "20070518", - "title": "Maps - It Will Find You", - "description": "Music video by Maps performing It Will Find You.", - "uploader": "MuteUSA", - "uploader_id": "MuteUSA" + }, + { + u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U", + u"file": u"1ltcDfZMA3U.flv", + u"note": u"Test VEVO video (#897)", + u"info_dict": { + u"upload_date": u"20070518", + u"title": u"Maps - It Will Find You", + u"description": u"Music video by Maps performing It Will Find You.", + u"uploader": u"MuteUSA", + u"uploader_id": u"MuteUSA" + } + }, + { + u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY", + u"file": u"UxxajLWwzqY.mp4", + u"note": u"Test generic use_cipher_signature video (#897)", + u"info_dict": { + u"upload_date": u"20120506", + u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", + u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", + u"uploader": u"IconaPop", + u"uploader_id": u"IconaPop" } - }, - { - "url": "http://www.youtube.com/watch?v=UxxajLWwzqY", - "file": "UxxajLWwzqY.mp4", - "note": "Test generic use_cipher_signature video (#897)", - "info_dict": { - "upload_date": "20120506", - "title": "Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - "description": "md5:b085c9804f5ab69f4adea963a2dceb3c", - "uploader": "IconaPop", - "uploader_id": "IconaPop" + }, + { + u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ", + u"file": u"07FYdnEawAQ.mp4", + u"note": u"Test VEVO video with age protection (#956)", + u"info_dict": { + u"upload_date": u"20130703", + u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", + u"description": u"md5:64249768eec3bc4276236606ea996373", + u"uploader": u"justintimberlakeVEVO", + u"uploader_id": u"justintimberlakeVEVO" } - } + }, ] @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - if YoutubePlaylistIE.suitable(url): return False + if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def report_lang(self): """Report attempt to set language.""" self.to_screen(u'Setting language') - def report_login(self): - """Report attempt to log in.""" - self.to_screen(u'Logging in') - def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self.to_screen(u'%s: Downloading video webpage' % video_id) @@ -142,19 +211,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to download video info webpage.""" self.to_screen(u'%s: Downloading video info webpage' % video_id) - def report_video_subtitles_download(self, video_id): - """Report attempt to download video info webpage.""" - self.to_screen(u'%s: Checking available subtitles' % video_id) - - def report_video_subtitles_request(self, video_id, sub_lang, format): - """Report attempt to download video info webpage.""" - self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) - - def report_video_subtitles_available(self, video_id, sub_lang_list): - """Report available subtitles.""" - sub_lang = ",".join(list(sub_lang_list.keys())) - self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang)) - def report_information_extraction(self, video_id): """Report attempt to extract video information.""" self.to_screen(u'%s: Extracting video information' % video_id) @@ -168,124 +224,32 @@ class YoutubeIE(InfoExtractor): self.to_screen(u'RTMP download detected') def _decrypt_signature(self, s): - """Decrypt the key""" + """Turn the encrypted s field into a working signature""" - if len(s) == 88: + if len(s) == 92: + return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 90: + return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] + elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] elif len(s) == 86: return s[2:63] + s[82] + s[64:82] + s[63] elif len(s) == 85: - return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] + return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: - return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] + return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] + elif len(s) == 81: + return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81] else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): - self.report_video_subtitles_download(video_id) - request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) - try: - sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None) - sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) - sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) - if not sub_lang_list: - return (u'video doesn\'t have subtitles', None) - return sub_lang_list - - def _list_available_subtitles(self, video_id): - sub_lang_list = self._get_available_subtitles(video_id) - self.report_video_subtitles_available(video_id, sub_lang_list) - - def _request_subtitle(self, sub_lang, sub_name, video_id, format): - """ - Return tuple: - (error_message, sub_lang, sub) - """ - self.report_video_subtitles_request(video_id, sub_lang, format) - params = compat_urllib_parse.urlencode({ - 'lang': sub_lang, - 'name': sub_name, - 'v': video_id, - 'fmt': format, - }) - url = 'http://www.youtube.com/api/timedtext?' + params - try: - sub = compat_urllib_request.urlopen(url).read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None, None) - if not sub: - return (u'Did not fetch video subtitles', None, None) - return (None, sub_lang, sub) - - def _request_automatic_caption(self, video_id, webpage): - """We need the webpage for getting the captions url, pass it as an - argument to speed up the process.""" - sub_lang = self._downloader.params.get('subtitleslang') or 'en' - sub_format = self._downloader.params.get('subtitlesformat') - self.to_screen(u'%s: Looking for automatic captions' % video_id) - mobj = re.search(r';ytplayer.config = ({.*?});', webpage) - err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang - if mobj is None: - return [(err_msg, None, None)] - player_config = json.loads(mobj.group(1)) - try: - args = player_config[u'args'] - caption_url = args[u'ttsurl'] - timestamp = args[u'timestamp'] - params = compat_urllib_parse.urlencode({ - 'lang': 'en', - 'tlang': sub_lang, - 'fmt': sub_format, - 'ts': timestamp, - 'kind': 'asr', - }) - subtitles_url = caption_url + '&' + params - sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') - return [(None, sub_lang, sub)] - except KeyError: - return [(err_msg, None, None)] - - def _extract_subtitle(self, video_id): - """ - Return a list with a tuple: - [(error_message, sub_lang, sub)] - """ - sub_lang_list = self._get_available_subtitles(video_id) - sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' - else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] - - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - return [subtitle] - - def _extract_all_subtitles(self, video_id): - sub_lang_list = self._get_available_subtitles(video_id) - sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] - subtitles = [] - for sub_lang in sub_lang_list: - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - subtitles.append(subtitle) - return subtitles - def _print_formats(self, formats): print('Available formats:') for x in formats: @@ -295,26 +259,6 @@ class YoutubeIE(InfoExtractor): if self._downloader is None: return - username = None - password = None - downloader_params = self._downloader.params - - # Attempt to use provided username and password or .netrc data - if downloader_params.get('username', None) is not None: - username = downloader_params['username'] - password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - username = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) - return - # Set language request = compat_urllib_request.Request(self._LANG_URL) try: @@ -324,6 +268,8 @@ class YoutubeIE(InfoExtractor): self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) return + (username, password) = self._get_login_info() + # No authentication to be performed if username is None: return @@ -402,6 +348,9 @@ class YoutubeIE(InfoExtractor): return video_id def _real_extract(self, url): + if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): + self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') + # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: @@ -428,18 +377,38 @@ class YoutubeIE(InfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + if re.search(r'player-age-gate-content">', video_webpage) is not None: + self.report_age_confirmation() + age_gate = True + # We simulate the access to the video from www.youtube.com/v/{video_id} + # this can be viewed without login into Youtube + data = compat_urllib_parse.urlencode({'video_id': video_id, + 'el': 'embedded', + 'gl': 'US', + 'hl': 'en', + 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'asv': 3, + 'sts':'1588', + }) + video_info_url = 'https://www.youtube.com/get_video_info?' + data video_info_webpage = self._download_webpage(video_info_url, video_id, note=False, errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) - if 'token' in video_info: - break + else: + age_gate = False + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + video_info_webpage = self._download_webpage(video_info_url, video_id, + note=False, + errnote='unable to download video info webpage') + video_info = compat_parse_qs(video_info_webpage) + if 'token' in video_info: + break if 'token' not in video_info: if 'reason' in video_info: - raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0]) + raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) else: raise ExtractorError(u'"token" parameter not in video info for unknown reason') @@ -469,7 +438,12 @@ class YoutubeIE(InfoExtractor): video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) # thumbnail image - if 'thumbnail_url' not in video_info: + # We try first to get a high quality image: + m_thumb = re.search(r'', + video_webpage, re.DOTALL) + if m_thumb is not None: + video_thumbnail = m_thumb.group(1) + elif 'thumbnail_url' not in video_info: self._downloader.report_warning(u'unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it @@ -496,25 +470,10 @@ class YoutubeIE(InfoExtractor): # subtitles video_subtitles = None - if self._downloader.params.get('writesubtitles', False): - video_subtitles = self._extract_subtitle(video_id) - if video_subtitles: - (sub_error, sub_lang, sub) = video_subtitles[0] - if sub_error: - self._downloader.report_warning(sub_error) - - if self._downloader.params.get('writeautomaticsub', False): + if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_subtitles(video_id) + elif self._downloader.params.get('writeautomaticsub', False): video_subtitles = self._request_automatic_caption(video_id, video_webpage) - (sub_error, sub_lang, sub) = video_subtitles[0] - if sub_error: - self._downloader.report_warning(sub_error) - - if self._downloader.params.get('allsubtitles', False): - video_subtitles = self._extract_all_subtitles(video_id) - for video_subtitle in video_subtitles: - (sub_error, sub_lang, sub) = video_subtitle - if sub_error: - self._downloader.report_warning(sub_error) if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(video_id) @@ -548,6 +507,8 @@ class YoutubeIE(InfoExtractor): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: + raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): url_data = compat_parse_qs(url_data_str) @@ -558,10 +519,17 @@ class YoutubeIE(InfoExtractor): elif 's' in url_data: if self._downloader.params.get('verbose'): s = url_data['s'][0] - player = self._search_regex(r'html5player-(.+?)\.js', video_webpage, - 'html5 player', fatal=False) - self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' % - (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player)) + if age_gate: + player_version = self._search_regex(r'ad3-(.+?)\.swf', + video_info['ad3_module'][0], 'flash player', + fatal=False) + player = 'flash player %s' % player_version + else: + player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, + 'html5 player', fatal=False) + parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.')) + self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % + (len(s), parts_sizes, url_data['itag'][0], player)) signature = self._decrypt_signature(url_data['s'][0]) url += '&signature=' + signature if 'ratebypass' not in url: @@ -583,7 +551,7 @@ class YoutubeIE(InfoExtractor): if req_format is None or req_format == 'best': video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif req_format == 'worst': - video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality + video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality elif req_format in ('-1', 'all'): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: @@ -626,8 +594,7 @@ class YoutubeIE(InfoExtractor): return results class YoutubePlaylistIE(InfoExtractor): - """Information Extractor for YouTube playlists.""" - + IE_DESC = u'YouTube.com playlists' _VALID_URL = r"""(?: (?:https?://)? (?:\w+\.)? @@ -689,13 +656,12 @@ class YoutubePlaylistIE(InfoExtractor): videos = [v[1] for v in sorted(videos)] - url_results = [self.url_result(url, 'Youtube') for url in videos] + url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] return [self.playlist_result(url_results, playlist_id, playlist_title)] class YoutubeChannelIE(InfoExtractor): - """Information Extractor for YouTube channels.""" - + IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' _MORE_PAGES_INDICATOR = 'yt-uix-load-more' @@ -748,13 +714,12 @@ class YoutubeChannelIE(InfoExtractor): self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] - url_entries = [self.url_result(url, 'Youtube') for url in urls] + url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] return [self.playlist_result(url_entries, channel_id)] class YoutubeUserIE(InfoExtractor): - """Information Extractor for YouTube users.""" - + IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 @@ -806,11 +771,11 @@ class YoutubeUserIE(InfoExtractor): pagenum += 1 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] - url_results = [self.url_result(url, 'Youtube') for url in urls] + url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] return [self.playlist_result(url_results, playlist_title = username)] class YoutubeSearchIE(SearchInfoExtractor): - """Information Extractor for YouTube search queries.""" + IE_DESC = u'YouTube.com searches' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' _MAX_RESULTS = 1000 IE_NAME = u'youtube:search' @@ -850,3 +815,55 @@ class YoutubeSearchIE(SearchInfoExtractor): video_ids = video_ids[:n] videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] return self.playlist_result(videos, query) + + +class YoutubeShowIE(InfoExtractor): + IE_DESC = u'YouTube.com (multi-season) shows' + _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' + IE_NAME = u'youtube:show' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_name = mobj.group(1) + webpage = self._download_webpage(url, show_name, u'Downloading show webpage') + # There's one playlist for each season of the show + m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) + self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) + return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] + + +class YoutubeSubscriptionsIE(YoutubeIE): + """It's a subclass of YoutubeIE because we need to login""" + IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' + _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + IE_NAME = u'youtube:subscriptions' + _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s' + _PAGING_STEP = 30 + + # Overwrite YoutubeIE properties we don't want + _TESTS = [] + @classmethod + def suitable(cls, url): + return re.match(cls._VALID_URL, url) is not None + + def _real_initialize(self): + (username, password) = self._get_login_info() + if username is None: + raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True) + super(YoutubeSubscriptionsIE, self)._real_initialize() + + def _real_extract(self, url): + feed_entries = [] + # The step argument is available only in 2.7 or higher + for i in itertools.count(0): + paging = i*self._PAGING_STEP + info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed', + u'Downloading page %s' % i) + info = json.loads(info) + feed_html = info['feed_html'] + m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html) + ids = orderedSet(m.group(1) for m in m_ids) + feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) + if info['paging'] is None: + break + return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')