From: Ismaël Mejía Date: Sat, 2 Nov 2013 17:01:05 +0000 (+0100) Subject: [subtitles] refactor to support websites with subtitle information the X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=1f343eaabbb9e0daf67363b7737833cf5e2a3e16;p=youtube-dl [subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. --- diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 7d8353946..3aef82bcf 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -113,9 +113,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_url = info[max_quality] # subtitles - video_subtitles = self.extract_subtitles(video_id) + video_subtitles = self.extract_subtitles(video_id, webpage) if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id) + self._list_available_subtitles(video_id, webpage) return return [{ @@ -129,7 +129,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'thumbnail': info['thumbnail_url'] }] - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 90de7de3a..4b4c5235d 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return any([self._downloader.params.get('writesubtitles', False), self._downloader.params.get('writeautomaticsub')]) - def _list_available_subtitles(self, video_id, webpage=None): + def _list_available_subtitles(self, video_id, webpage): """ outputs the available subtitles for the video """ - sub_lang_list = self._get_available_subtitles(video_id) + sub_lang_list = self._get_available_subtitles(video_id, webpage) auto_captions_list = self._get_available_automatic_caption(video_id, webpage) sub_lang = ",".join(list(sub_lang_list.keys())) self.to_screen(u'%s: Available subtitles for video: %s' % @@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor): self.to_screen(u'%s: Available automatic captions for video: %s' % (video_id, auto_lang)) - def extract_subtitles(self, video_id, video_webpage=None): + def extract_subtitles(self, video_id, webpage): """ returns {sub_lang: sub} ,{} if subtitles not found or None if the subtitles aren't requested. @@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return None available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): - available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) + available_subs_list.update(self._get_available_automatic_caption(video_id, webpage)) if self._downloader.params.get('writesubtitles', False): - available_subs_list.update(self._get_available_subtitles(video_id)) + available_subs_list.update(self._get_available_subtitles(video_id, webpage)) if not available_subs_list: # error, it didn't get the available subtitles return {} @@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor): return return sub - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): """ returns {sub_lang: url} or {} if not available Must be redefined by the subclasses diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4347651d7..d7c9b38f9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1099,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,