Allow to specify multiple subtitles languages separated by commas (closes #518)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 23 Aug 2013 16:34:57 +0000 (18:34 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 23 Aug 2013 16:34:57 +0000 (18:34 +0200)
test/test_youtube_subtitles.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/youtube.py

index fe0eac6804767f3cc2b355912c1e4179b310be10..641206277bbeec22d11339bb3b143df5adff834e 100644 (file)
@@ -40,7 +40,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
     def test_youtube_subtitles_it(self):
         DL = FakeYDL()
         DL.params['writesubtitles'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
         IE = YoutubeIE(DL)
         info_dict = IE.extract('QRS8MkLhQmM')
         sub = info_dict[0]['subtitles']['it']
@@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
     def test_youtube_automatic_captions(self):
         DL = FakeYDL()
         DL.params['writeautomaticsub'] = True
-        DL.params['subtitleslang'] = 'it'
+        DL.params['subtitleslangs'] = ['it']
         IE = YoutubeIE(DL)
         info_dict = IE.extract('8YoUxe5ncPo')
         sub = info_dict[0]['subtitles']['it']
         self.assertTrue(sub is not None)
+    def test_youtube_multiple_langs(self):
+        DL = FakeYDL()
+        DL.params['writesubtitles'] = True
+        langs = ['it', 'fr', 'de']
+        DL.params['subtitleslangs'] = langs
+        IE = YoutubeIE(DL)
+        subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
+        for lang in langs:
+            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
 
 if __name__ == '__main__':
     unittest.main()
index 1fd610a6e7b2285a078fb18f1d059154c9a540c0..3fc4ec378d2334f35a74949e21cfde592a2e64c7 100644 (file)
@@ -76,7 +76,7 @@ class YoutubeDL(object):
     allsubtitles:      Downloads all the subtitles of the video
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
-    subtitleslang:     Language of the subtitles to download
+    subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
index 441ca6b6a74ed4cc2882c4c84fbdf457cd2b7da2..61442907359a76f277c4ccea4ffd5ddb64bdb8fb 100644 (file)
@@ -83,6 +83,9 @@ def parseOpts(overrideArguments=None):
 
         return "".join(opts)
 
+    def _comma_separated_values_options_callback(option, opt_str, value, parser):
+        setattr(parser.values, option.dest, value.split(','))
+
     def _find_term_columns():
         columns = os.environ.get('COLUMNS', None)
         if columns:
@@ -206,9 +209,10 @@ def parseOpts(overrideArguments=None):
     subtitles.add_option('--sub-format',
             action='store', dest='subtitlesformat', metavar='FORMAT',
             help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
-    subtitles.add_option('--sub-lang', '--srt-lang',
-            action='store', dest='subtitleslang', metavar='LANG',
-            help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
+    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
+            action='callback', dest='subtitleslang', metavar='LANGS', type='str',
+            default=[], callback=_comma_separated_values_options_callback,
+            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
 
     downloader.add_option('-r', '--rate-limit',
             dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@@ -573,7 +577,7 @@ def _real_main(argv=None):
         'allsubtitles': opts.allsubtitles,
         'listsubtitles': opts.listsubtitles,
         'subtitlesformat': opts.subtitlesformat,
-        'subtitleslang': opts.subtitleslang,
+        'subtitleslangs': opts.subtitleslang,
         'matchtitle': decodeOption(opts.matchtitle),
         'rejecttitle': decodeOption(opts.rejecttitle),
         'max_downloads': opts.max_downloads,
index 446d53f644114e40915a821d63b0d61d6ed19d0d..5f843a8713bed7036b4c9035d85634955a1c3c1c 100644 (file)
@@ -496,7 +496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _request_automatic_caption(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
+        sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
         sub_format = self._downloader.params.get('subtitlesformat')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@ -530,23 +530,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         Return a dictionary: {language: subtitles} or {} if the subtitles
         couldn't be found
         """
-        sub_lang_list = self._get_available_subtitles(video_id)
+        available_subs_list = self._get_available_subtitles(video_id)
         sub_format = self._downloader.params.get('subtitlesformat')
-        if  not sub_lang_list: #There was some error, it didn't get the available subtitles
+        if  not available_subs_list: #There was some error, it didn't get the available subtitles
             return {}
         if self._downloader.params.get('allsubtitles', False):
-            pass
+            sub_lang_list = available_subs_list
         else:
-            if self._downloader.params.get('subtitleslang', False):
-                sub_lang = self._downloader.params.get('subtitleslang')
-            elif 'en' in sub_lang_list:
-                sub_lang = 'en'
+            if self._downloader.params.get('subtitleslangs', False):
+                reqested_langs = self._downloader.params.get('subtitleslangs')
+            elif 'en' in available_subs_list:
+                reqested_langs = ['en']
             else:
-                sub_lang = list(sub_lang_list.keys())[0]
-            if not sub_lang in sub_lang_list:
-                self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
-                return {}
-            sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
+                reqested_langs = [list(available_subs_list.keys())[0]]
+
+            sub_lang_list = {}
+            for sub_lang in reqested_langs:
+                if not sub_lang in available_subs_list:
+                    self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+                    continue
+                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
         subtitles = {}
         for sub_lang in sub_lang_list:
             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)