[airmozilla] Add new extractor
[youtube-dl] / youtube_dl / extractor / subtitles.py
1 from __future__ import unicode_literals
2 from .common import InfoExtractor
3
4 from ..compat import compat_str
5 from ..utils import (
6     ExtractorError,
7 )
8
9
10 class SubtitlesInfoExtractor(InfoExtractor):
11     @property
12     def _have_to_download_any_subtitles(self):
13         return any([self._downloader.params.get('writesubtitles', False),
14                     self._downloader.params.get('writeautomaticsub')])
15
16     def _list_available_subtitles(self, video_id, webpage):
17         """ outputs the available subtitles for the video """
18         sub_lang_list = self._get_available_subtitles(video_id, webpage)
19         auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
20         sub_lang = ",".join(list(sub_lang_list.keys()))
21         self.to_screen('%s: Available subtitles for video: %s' %
22                        (video_id, sub_lang))
23         auto_lang = ",".join(auto_captions_list.keys())
24         self.to_screen('%s: Available automatic captions for video: %s' %
25                        (video_id, auto_lang))
26
27     def extract_subtitles(self, video_id, webpage):
28         """
29         returns {sub_lang: sub} ,{} if subtitles not found or None if the
30         subtitles aren't requested.
31         """
32         if not self._have_to_download_any_subtitles:
33             return None
34         available_subs_list = {}
35         if self._downloader.params.get('writeautomaticsub', False):
36             available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
37         if self._downloader.params.get('writesubtitles', False):
38             available_subs_list.update(self._get_available_subtitles(video_id, webpage))
39
40         if not available_subs_list:  # error, it didn't get the available subtitles
41             return {}
42         if self._downloader.params.get('allsubtitles', False):
43             sub_lang_list = available_subs_list
44         else:
45             if self._downloader.params.get('subtitleslangs', False):
46                 requested_langs = self._downloader.params.get('subtitleslangs')
47             elif 'en' in available_subs_list:
48                 requested_langs = ['en']
49             else:
50                 requested_langs = [list(available_subs_list.keys())[0]]
51
52             sub_lang_list = {}
53             for sub_lang in requested_langs:
54                 if sub_lang not in available_subs_list:
55                     self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
56                     continue
57                 sub_lang_list[sub_lang] = available_subs_list[sub_lang]
58
59         subtitles = {}
60         for sub_lang, url in sub_lang_list.items():
61             subtitle = self._request_subtitle_url(sub_lang, url)
62             if subtitle:
63                 subtitles[sub_lang] = subtitle
64         return subtitles
65
66     def _download_subtitle_url(self, sub_lang, url):
67         return self._download_webpage(url, None, note=False)
68
69     def _request_subtitle_url(self, sub_lang, url):
70         """ makes the http request for the subtitle """
71         try:
72             sub = self._download_subtitle_url(sub_lang, url)
73         except ExtractorError as err:
74             self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
75             return
76         if not sub:
77             self._downloader.report_warning('Did not fetch video subtitles')
78             return
79         return sub
80
81     def _get_available_subtitles(self, video_id, webpage):
82         """
83         returns {sub_lang: url} or {} if not available
84         Must be redefined by the subclasses
85         """
86
87         # By default, allow implementations to simply pass in the result
88         assert isinstance(webpage, dict), \
89             '_get_available_subtitles not implemented'
90         return webpage
91
92     def _get_available_automatic_caption(self, video_id, webpage):
93         """
94         returns {sub_lang: url} or {} if not available
95         Must be redefined by the subclasses that support automatic captions,
96         otherwise it will return {}
97         """
98         self._downloader.report_warning('Automatic Captions not supported by this server')
99         return {}