X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=98cac7c17c01ccce217cd09cb7b72437913d756a;hb=5f6a1245ffa9276c1af59b0835afeef67e2fb5b1;hp=4ab56e0ac6baf7f59f1c8892b5dbe560d96cb195;hpb=8d81f872fb9844ac640c84b8937d3a53b729d1aa;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4ab56e0ac..98cac7c17 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -33,6 +33,7 @@ from ..utils import ( uppercase_escape, ) + class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -99,7 +100,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode # chokes on unicode - login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') req = compat_urllib_request.Request(self._LOGIN_URL, login_data) @@ -149,7 +150,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'service': 'youtube', 'hl': 'en_US', } - tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items()) + tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items()) tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii') tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) @@ -185,8 +186,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._download_webpage( req, None, - note='Confirming age', errnote='Unable to confirm age') - return True + note='Confirming age', errnote='Unable to confirm age', + fatal=False) def _real_initialize(self): if self._downloader is None: @@ -274,6 +275,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, @@ -297,11 +301,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + # Dash webm audio with opus inside + '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, + '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50}, + '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50}, + # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } @@ -396,6 +407,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'format': '141', }, }, + # Controversy video + { + 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8', + 'info_dict': { + 'id': 'T4XJQO3qol8', + 'ext': 'mp4', + 'upload_date': '20100909', + 'uploader': 'The Amazing Atheist', + 'uploader_id': 'TheAmazingAtheist', + 'title': 'Burning Everyone\'s Koran', + 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', + } + } ] def __init__(self, *args, **kwargs): @@ -505,7 +529,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'signature=([$a-zA-Z]+)', jscode, + r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode, 'Initial JS player signature function name') jsi = JSInterpreter(jscode) @@ -595,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): list_url = caption_url + '&' + list_params caption_list = self._download_xml(list_url, video_id) original_lang_node = caption_list.find('track') - if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : + if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr': self._downloader.report_warning('Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] @@ -628,6 +652,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _extract_from_m3u8(self, manifest_url, video_id): url_map = {} + def _get_urls(_manifest): lines = _manifest.split('\n') urls = filter(lambda l: l and not l.startswith('#'), @@ -656,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_id = self.extract_id(url) # Get video webpage - url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id + url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id pref_cookies = [ c for c in self._downloader.cookiejar if c.domain == '.youtube.com' and c.name == 'PREF'] @@ -679,7 +704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) if re.search(r'player-age-gate-content">', video_webpage) is not None: - self.report_age_confirmation() age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube @@ -687,12 +711,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( - r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'), + r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data - video_info_webpage = self._download_webpage(video_info_url, video_id, - note=False, - errnote='unable to download video info webpage') + video_info_webpage = self._download_webpage( + video_info_url, video_id, + note='Refetching age-gated info webpage', + errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) else: age_gate = False @@ -827,7 +852,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + video_annotations = self._extract_annotations(video_id) # Decide which formats to download try: @@ -877,7 +902,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'player_url': player_url, }] elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: - encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] + encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} @@ -951,6 +976,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): dash_manifest_url = video_info.get('dashmpd')[0] else: dash_manifest_url = ytplayer_config['args']['dashmpd'] + def decrypt_sig(mobj): s = mobj.group(1) dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) @@ -986,7 +1012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): existing_format.update(f) except (ExtractorError, KeyError) as e: - self.report_warning('Skipping DASH manifest: %s' % e, video_id) + self.report_warning('Skipping DASH manifest: %r' % e, video_id) self._sort_formats(formats) @@ -1010,6 +1036,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'formats': formats, } + class YoutubePlaylistIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: @@ -1023,7 +1050,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): ) ( (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} - # Top tracks, they can also include dots + # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* @@ -1038,6 +1065,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 'info_dict': { 'title': 'ytdl test PL', + 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', }, 'playlist_count': 3, }, { @@ -1057,7 +1085,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'note': 'issue #673', 'url': 'PLBB231211A4F62143', 'info_dict': { - 'title': 'Team Fortress 2 (Class-based LP)', + 'title': '[OLD]Team Fortress 2 (Class-based LP)', }, 'playlist_mincount': 26, }, { @@ -1206,7 +1234,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): ]*>.*?%s.*?''' % re.escape(query), channel_page, 'list') url = compat_urlparse.urljoin('https://www.youtube.com/', link) - + video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' ids = [] # sometimes the webpage doesn't contain the videos @@ -1274,7 +1302,7 @@ class YoutubeChannelIE(InfoExtractor): ids_in_page = self.extract_videos_from_page(page['content_html']) video_ids.extend(ids_in_page) - + if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: break @@ -1309,8 +1337,10 @@ class YoutubeUserIE(InfoExtractor): # Don't return True if the url can be extracted with other youtube # extractor, the regex would is too permissive and it would match. other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) - if any(ie.suitable(url) for ie in other_ies): return False - else: return super(YoutubeUserIE, cls).suitable(url) + if any(ie.suitable(url) for ie in other_ies): + return False + else: + return super(YoutubeUserIE, cls).suitable(url) def _real_extract(self, url): # Extract username @@ -1533,12 +1563,14 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): paging = mobj.group('paging') return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) + class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' + class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' @@ -1546,6 +1578,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): _PLAYLIST_TITLE = 'Youtube Watch Later' _PERSONAL_FEED = True + class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' @@ -1553,6 +1586,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): _PERSONAL_FEED = True _PLAYLIST_TITLE = 'Youtube Watch History' + class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'