]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/subtitles.py
Merge branch 'ustream-embed-recorded2' of https://github.com/anovicecodemonkey/youtub...
[youtube-dl] / youtube_dl / extractor / subtitles.py
1 from .common import InfoExtractor
2
3 from ..utils import (
4     compat_str,
5     ExtractorError,
6 )
7
8
9 class SubtitlesInfoExtractor(InfoExtractor):
10     @property
11     def _have_to_download_any_subtitles(self):
12         return any([self._downloader.params.get('writesubtitles', False),
13                     self._downloader.params.get('writeautomaticsub')])
14
15     def _list_available_subtitles(self, video_id, webpage):
16         """ outputs the available subtitles for the video """
17         sub_lang_list = self._get_available_subtitles(video_id, webpage)
18         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
19         sub_lang = ",".join(list(sub_lang_list.keys()))
20         self.to_screen(u'%s: Available subtitles for video: %s' %
21                        (video_id, sub_lang))
22         auto_lang = ",".join(auto_captions_list.keys())
23         self.to_screen(u'%s: Available automatic captions for video: %s' %
24                        (video_id, auto_lang))
25
26     def extract_subtitles(self, video_id, webpage):
27         """
28         returns {sub_lang: sub} ,{} if subtitles not found or None if the
29         subtitles aren't requested.
30         """
31         if not self._have_to_download_any_subtitles:
32             return None
33         available_subs_list = {}
34         if self._downloader.params.get('writeautomaticsub', False):
35             available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
36         if self._downloader.params.get('writesubtitles', False):
37             available_subs_list.update(self._get_available_subtitles(video_id, webpage))
38
39         if not available_subs_list:  # error, it didn't get the available subtitles
40             return {}
41         if self._downloader.params.get('allsubtitles', False):
42             sub_lang_list = available_subs_list
43         else:
44             if self._downloader.params.get('subtitleslangs', False):
45                 requested_langs = self._downloader.params.get('subtitleslangs')
46             elif 'en' in available_subs_list:
47                 requested_langs = ['en']
48             else:
49                 requested_langs = [list(available_subs_list.keys())[0]]
50
51             sub_lang_list = {}
52             for sub_lang in requested_langs:
53                 if not sub_lang in available_subs_list:
54                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
55                     continue
56                 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
57
58         subtitles = {}
59         for sub_lang, url in sub_lang_list.items():
60             subtitle = self._request_subtitle_url(sub_lang, url)
61             if subtitle:
62                 subtitles[sub_lang] = subtitle
63         return subtitles
64
65     def _download_subtitle_url(self, sub_lang, url):
66         return self._download_webpage(url, None, note=False)
67
68     def _request_subtitle_url(self, sub_lang, url):
69         """ makes the http request for the subtitle """
70         try:
71             sub = self._download_subtitle_url(sub_lang, url)
72         except ExtractorError as err:
73             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
74             return
75         if not sub:
76             self._downloader.report_warning(u'Did not fetch video subtitles')
77             return
78         return sub
79
80     def _get_available_subtitles(self, video_id, webpage):
81         """
82         returns {sub_lang: url} or {} if not available
83         Must be redefined by the subclasses
84         """
85
86         # By default, allow implementations to simply pass in the result
87         assert isinstance(webpage, dict), \
88             '_get_available_subtitles not implemented'
89         return webpage
90
91     def _get_available_automatic_caption(self, video_id, webpage):
92         """
93         returns {sub_lang: url} or {} if not available
94         Must be redefined by the subclasses that support automatic captions,
95         otherwise it will return {}
96         """
97         self._downloader.report_warning(u'Automatic Captions not supported by this server')
98         return {}