[lynda] Convert to new subtitles system
[youtube-dl] / test / test_subtitles.py
1 #!/usr/bin/env python
2 from __future__ import unicode_literals
3
4 # Allow direct execution
5 import os
6 import sys
7 import unittest
8 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
10 from test.helper import FakeYDL, md5
11
12
13 from youtube_dl.extractor import (
14     BlipTVIE,
15     YoutubeIE,
16     DailymotionIE,
17     TEDIE,
18     VimeoIE,
19     WallaIE,
20     CeskaTelevizeIE,
21     LyndaIE,
22 )
23
24
25 class BaseTestSubtitles(unittest.TestCase):
26     url = None
27     IE = None
28
29     def setUp(self):
30         self.DL = FakeYDL()
31         self.ie = self.IE()
32         self.DL.add_info_extractor(self.ie)
33
34     def getInfoDict(self):
35         info_dict = self.DL.extract_info(self.url, download=False)
36         return info_dict
37
38     def getSubtitles(self):
39         info_dict = self.getInfoDict()
40         subtitles = info_dict['requested_subtitles']
41         if not subtitles:
42             return subtitles
43         for sub_info in subtitles.values():
44             if sub_info.get('data') is None:
45                 uf = self.DL.urlopen(sub_info['url'])
46                 sub_info['data'] = uf.read().decode('utf-8')
47         return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
48
49
50 class TestYoutubeSubtitles(BaseTestSubtitles):
51     url = 'QRS8MkLhQmM'
52     IE = YoutubeIE
53
54     def test_youtube_subtitles(self):
55         self.DL.params['writesubtitles'] = True
56         subtitles = self.getSubtitles()
57         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
58
59     def test_youtube_subtitles_lang(self):
60         self.DL.params['writesubtitles'] = True
61         self.DL.params['subtitleslangs'] = ['it']
62         subtitles = self.getSubtitles()
63         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
64
65     def test_youtube_allsubtitles(self):
66         self.DL.params['writesubtitles'] = True
67         self.DL.params['allsubtitles'] = True
68         subtitles = self.getSubtitles()
69         self.assertEqual(len(subtitles.keys()), 13)
70
71     def test_youtube_subtitles_sbv_format(self):
72         self.DL.params['writesubtitles'] = True
73         self.DL.params['subtitlesformat'] = 'sbv'
74         subtitles = self.getSubtitles()
75         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
76
77     def test_youtube_subtitles_vtt_format(self):
78         self.DL.params['writesubtitles'] = True
79         self.DL.params['subtitlesformat'] = 'vtt'
80         subtitles = self.getSubtitles()
81         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
82
83     def test_youtube_list_subtitles(self):
84         self.DL.expect_warning('Video doesn\'t have automatic captions')
85         self.DL.params['listsubtitles'] = True
86         info_dict = self.getInfoDict()
87         self.assertEqual(info_dict, None)
88
89     def test_youtube_automatic_captions(self):
90         self.url = '8YoUxe5ncPo'
91         self.DL.params['writeautomaticsub'] = True
92         self.DL.params['subtitleslangs'] = ['it']
93         subtitles = self.getSubtitles()
94         self.assertTrue(subtitles['it'] is not None)
95
96     def test_youtube_translated_subtitles(self):
97         # This video has a subtitles track, which can be translated
98         self.url = 'Ky9eprVWzlI'
99         self.DL.params['writeautomaticsub'] = True
100         self.DL.params['subtitleslangs'] = ['it']
101         subtitles = self.getSubtitles()
102         self.assertTrue(subtitles['it'] is not None)
103
104     def test_youtube_nosubtitles(self):
105         self.DL.expect_warning('video doesn\'t have subtitles')
106         self.url = 'n5BB19UTcdA'
107         self.DL.params['writesubtitles'] = True
108         self.DL.params['allsubtitles'] = True
109         subtitles = self.getSubtitles()
110         self.assertEqual(len(subtitles), 0)
111
112     def test_youtube_multiple_langs(self):
113         self.url = 'QRS8MkLhQmM'
114         self.DL.params['writesubtitles'] = True
115         langs = ['it', 'fr', 'de']
116         self.DL.params['subtitleslangs'] = langs
117         subtitles = self.getSubtitles()
118         for lang in langs:
119             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
120
121
122 class TestDailymotionSubtitles(BaseTestSubtitles):
123     url = 'http://www.dailymotion.com/video/xczg00'
124     IE = DailymotionIE
125
126     def test_subtitles(self):
127         self.DL.params['writesubtitles'] = True
128         subtitles = self.getSubtitles()
129         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
130
131     def test_subtitles_lang(self):
132         self.DL.params['writesubtitles'] = True
133         self.DL.params['subtitleslangs'] = ['fr']
134         subtitles = self.getSubtitles()
135         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
136
137     def test_allsubtitles(self):
138         self.DL.params['writesubtitles'] = True
139         self.DL.params['allsubtitles'] = True
140         subtitles = self.getSubtitles()
141         self.assertTrue(len(subtitles.keys()) >= 6)
142
143     def test_nosubtitles(self):
144         self.DL.expect_warning('video doesn\'t have subtitles')
145         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
146         self.DL.params['writesubtitles'] = True
147         self.DL.params['allsubtitles'] = True
148         subtitles = self.getSubtitles()
149         self.assertEqual(len(subtitles), 0)
150
151     def test_multiple_langs(self):
152         self.DL.params['writesubtitles'] = True
153         langs = ['es', 'fr', 'de']
154         self.DL.params['subtitleslangs'] = langs
155         subtitles = self.getSubtitles()
156         for lang in langs:
157             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
158
159
160 class TestTedSubtitles(BaseTestSubtitles):
161     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
162     IE = TEDIE
163
164     def test_no_writesubtitles(self):
165         subtitles = self.getSubtitles()
166         self.assertFalse(subtitles)
167
168     def test_subtitles(self):
169         self.DL.params['writesubtitles'] = True
170         subtitles = self.getSubtitles()
171         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
172
173     def test_subtitles_lang(self):
174         self.DL.params['writesubtitles'] = True
175         self.DL.params['subtitleslangs'] = ['fr']
176         subtitles = self.getSubtitles()
177         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
178
179     def test_allsubtitles(self):
180         self.DL.params['writesubtitles'] = True
181         self.DL.params['allsubtitles'] = True
182         subtitles = self.getSubtitles()
183         self.assertTrue(len(subtitles.keys()) >= 28)
184
185     def test_list_subtitles(self):
186         self.DL.params['listsubtitles'] = True
187         info_dict = self.getInfoDict()
188         self.assertEqual(info_dict, None)
189
190     def test_multiple_langs(self):
191         self.DL.params['writesubtitles'] = True
192         langs = ['es', 'fr', 'de']
193         self.DL.params['subtitleslangs'] = langs
194         subtitles = self.getSubtitles()
195         for lang in langs:
196             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
197
198
199 class TestBlipTVSubtitles(BaseTestSubtitles):
200     url = 'http://blip.tv/a/a-6603250'
201     IE = BlipTVIE
202
203     def test_list_subtitles(self):
204         self.DL.params['listsubtitles'] = True
205         info_dict = self.getInfoDict()
206         self.assertEqual(info_dict, None)
207
208     def test_allsubtitles(self):
209         self.DL.params['writesubtitles'] = True
210         self.DL.params['allsubtitles'] = True
211         subtitles = self.getSubtitles()
212         self.assertEqual(set(subtitles.keys()), set(['en']))
213         self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
214
215
216 class TestVimeoSubtitles(BaseTestSubtitles):
217     url = 'http://vimeo.com/76979871'
218     IE = VimeoIE
219
220     def test_subtitles(self):
221         self.DL.params['writesubtitles'] = True
222         subtitles = self.getSubtitles()
223         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
224
225     def test_subtitles_lang(self):
226         self.DL.params['writesubtitles'] = True
227         self.DL.params['subtitleslangs'] = ['fr']
228         subtitles = self.getSubtitles()
229         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
230
231     def test_allsubtitles(self):
232         self.DL.params['writesubtitles'] = True
233         self.DL.params['allsubtitles'] = True
234         subtitles = self.getSubtitles()
235         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
236
237     def test_nosubtitles(self):
238         self.DL.expect_warning('video doesn\'t have subtitles')
239         self.url = 'http://vimeo.com/56015672'
240         self.DL.params['writesubtitles'] = True
241         self.DL.params['allsubtitles'] = True
242         subtitles = self.getSubtitles()
243         self.assertEqual(len(subtitles), 0)
244
245     def test_multiple_langs(self):
246         self.DL.params['writesubtitles'] = True
247         langs = ['es', 'fr', 'de']
248         self.DL.params['subtitleslangs'] = langs
249         subtitles = self.getSubtitles()
250         for lang in langs:
251             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
252
253
254 class TestWallaSubtitles(BaseTestSubtitles):
255     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
256     IE = WallaIE
257
258     def test_list_subtitles(self):
259         self.DL.expect_warning('Automatic Captions not supported by this server')
260         self.DL.params['listsubtitles'] = True
261         info_dict = self.getInfoDict()
262         self.assertEqual(info_dict, None)
263
264     def test_allsubtitles(self):
265         self.DL.expect_warning('Automatic Captions not supported by this server')
266         self.DL.params['writesubtitles'] = True
267         self.DL.params['allsubtitles'] = True
268         subtitles = self.getSubtitles()
269         self.assertEqual(set(subtitles.keys()), set(['heb']))
270         self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
271
272     def test_nosubtitles(self):
273         self.DL.expect_warning('video doesn\'t have subtitles')
274         self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
275         self.DL.params['writesubtitles'] = True
276         self.DL.params['allsubtitles'] = True
277         subtitles = self.getSubtitles()
278         self.assertEqual(len(subtitles), 0)
279
280
281 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
282     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
283     IE = CeskaTelevizeIE
284
285     def test_list_subtitles(self):
286         self.DL.expect_warning('Automatic Captions not supported by this server')
287         self.DL.params['listsubtitles'] = True
288         info_dict = self.getInfoDict()
289         self.assertEqual(info_dict, None)
290
291     def test_allsubtitles(self):
292         self.DL.expect_warning('Automatic Captions not supported by this server')
293         self.DL.params['writesubtitles'] = True
294         self.DL.params['allsubtitles'] = True
295         subtitles = self.getSubtitles()
296         self.assertEqual(set(subtitles.keys()), set(['cs']))
297         self.assertTrue(len(subtitles['cs']) > 20000)
298
299     def test_nosubtitles(self):
300         self.DL.expect_warning('video doesn\'t have subtitles')
301         self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
302         self.DL.params['writesubtitles'] = True
303         self.DL.params['allsubtitles'] = True
304         subtitles = self.getSubtitles()
305         self.assertEqual(len(subtitles), 0)
306
307
308 class TestLyndaSubtitles(BaseTestSubtitles):
309     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
310     IE = LyndaIE
311
312     def test_allsubtitles(self):
313         self.DL.params['writesubtitles'] = True
314         self.DL.params['allsubtitles'] = True
315         subtitles = self.getSubtitles()
316         self.assertEqual(set(subtitles.keys()), set(['en']))
317         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
318
319
320 if __name__ == '__main__':
321     unittest.main()