Merge pull request #1409 from JohnyMoSwag/master (closes #1404)
[youtube-dl] / youtube_dl / extractor / subtitles.py
1 from .common import InfoExtractor
2
3 from ..utils import (
4     compat_str,
5     ExtractorError,
6 )
7
8
9 class SubtitlesInfoExtractor(InfoExtractor):
10     @property
11     def _have_to_download_any_subtitles(self):
12         return any([self._downloader.params.get('writesubtitles', False),
13                     self._downloader.params.get('writeautomaticsub'),
14                     self._downloader.params.get('allsubtitles', False)])
15
16     def _list_available_subtitles(self, video_id, webpage=None):
17         """ outputs the available subtitles for the video """
18         sub_lang_list = self._get_available_subtitles(video_id)
19         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
20         sub_lang = ",".join(list(sub_lang_list.keys()))
21         self.to_screen(u'%s: Available subtitles for video: %s' %
22                        (video_id, sub_lang))
23         auto_lang = ",".join(auto_captions_list.keys())
24         self.to_screen(u'%s: Available automatic captions for video: %s' %
25                        (video_id, auto_lang))
26
27     def extract_subtitles(self, video_id, video_webpage=None):
28         """
29         returns {sub_lang: sub} ,{} if subtitles not found or None if the
30         subtitles aren't requested.
31         """
32         if not self._have_to_download_any_subtitles:
33             return None
34         available_subs_list = {}
35         if self._downloader.params.get('writeautomaticsub', False):
36             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
37         if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
38             available_subs_list.update(self._get_available_subtitles(video_id))
39
40         if not available_subs_list:  # error, it didn't get the available subtitles
41             return {}
42         if self._downloader.params.get('allsubtitles', False):
43             sub_lang_list = available_subs_list
44         else:
45             if self._downloader.params.get('subtitleslangs', False):
46                 requested_langs = self._downloader.params.get('subtitleslangs')
47             elif 'en' in available_subs_list:
48                 requested_langs = ['en']
49             else:
50                 requested_langs = [list(available_subs_list.keys())[0]]
51
52             sub_lang_list = {}
53             for sub_lang in requested_langs:
54                 if not sub_lang in available_subs_list:
55                     self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
56                     continue
57                 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
58
59         subtitles = {}
60         for sub_lang, url in sub_lang_list.items():
61             subtitle = self._request_subtitle_url(sub_lang, url)
62             if subtitle:
63                 subtitles[sub_lang] = subtitle
64         return subtitles
65
66     def _request_subtitle_url(self, sub_lang, url):
67         """ makes the http request for the subtitle """
68         try:
69             sub = self._download_webpage(url, None, note=False)
70         except ExtractorError as err:
71             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
72             return
73         if not sub:
74             self._downloader.report_warning(u'Did not fetch video subtitles')
75             return
76         return sub
77
78     def _get_available_subtitles(self, video_id):
79         """
80         returns {sub_lang: url} or {} if not available
81         Must be redefined by the subclasses
82         """
83         pass
84
85     def _get_available_automatic_caption(self, video_id, webpage):
86         """
87         returns {sub_lang: url} or {} if not available
88         Must be redefined by the subclasses that support automatic captions,
89         otherwise it will return {}
90         """
91         self._downloader.report_warning(u'Automatic Captions not supported by this server')
92         return {}