[theplatform] Convert to new subtitles system
[youtube-dl] / test / test_subtitles.py
1 #!/usr/bin/env python
2 from __future__ import unicode_literals
3
4 # Allow direct execution
5 import os
6 import sys
7 import unittest
8 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
10 from test.helper import FakeYDL, md5
11
12
13 from youtube_dl.extractor import (
14     BlipTVIE,
15     YoutubeIE,
16     DailymotionIE,
17     TEDIE,
18     VimeoIE,
19     WallaIE,
20     CeskaTelevizeIE,
21     LyndaIE,
22     NPOIE,
23     ComedyCentralIE,
24     NRKTVIE,
25     RaiIE,
26     VikiIE,
27     ThePlatformIE,
28 )
29
30
31 class BaseTestSubtitles(unittest.TestCase):
32     url = None
33     IE = None
34
35     def setUp(self):
36         self.DL = FakeYDL()
37         self.ie = self.IE()
38         self.DL.add_info_extractor(self.ie)
39
40     def getInfoDict(self):
41         info_dict = self.DL.extract_info(self.url, download=False)
42         return info_dict
43
44     def getSubtitles(self):
45         info_dict = self.getInfoDict()
46         subtitles = info_dict['requested_subtitles']
47         if not subtitles:
48             return subtitles
49         for sub_info in subtitles.values():
50             if sub_info.get('data') is None:
51                 uf = self.DL.urlopen(sub_info['url'])
52                 sub_info['data'] = uf.read().decode('utf-8')
53         return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
54
55
56 class TestYoutubeSubtitles(BaseTestSubtitles):
57     url = 'QRS8MkLhQmM'
58     IE = YoutubeIE
59
60     def test_youtube_subtitles(self):
61         self.DL.params['writesubtitles'] = True
62         subtitles = self.getSubtitles()
63         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
64
65     def test_youtube_subtitles_lang(self):
66         self.DL.params['writesubtitles'] = True
67         self.DL.params['subtitleslangs'] = ['it']
68         subtitles = self.getSubtitles()
69         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
70
71     def test_youtube_allsubtitles(self):
72         self.DL.params['writesubtitles'] = True
73         self.DL.params['allsubtitles'] = True
74         subtitles = self.getSubtitles()
75         self.assertEqual(len(subtitles.keys()), 13)
76
77     def test_youtube_subtitles_sbv_format(self):
78         self.DL.params['writesubtitles'] = True
79         self.DL.params['subtitlesformat'] = 'sbv'
80         subtitles = self.getSubtitles()
81         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
82
83     def test_youtube_subtitles_vtt_format(self):
84         self.DL.params['writesubtitles'] = True
85         self.DL.params['subtitlesformat'] = 'vtt'
86         subtitles = self.getSubtitles()
87         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
88
89     def test_youtube_list_subtitles(self):
90         self.DL.expect_warning('Video doesn\'t have automatic captions')
91         self.DL.params['listsubtitles'] = True
92         info_dict = self.getInfoDict()
93         self.assertEqual(info_dict, None)
94
95     def test_youtube_automatic_captions(self):
96         self.url = '8YoUxe5ncPo'
97         self.DL.params['writeautomaticsub'] = True
98         self.DL.params['subtitleslangs'] = ['it']
99         subtitles = self.getSubtitles()
100         self.assertTrue(subtitles['it'] is not None)
101
102     def test_youtube_translated_subtitles(self):
103         # This video has a subtitles track, which can be translated
104         self.url = 'Ky9eprVWzlI'
105         self.DL.params['writeautomaticsub'] = True
106         self.DL.params['subtitleslangs'] = ['it']
107         subtitles = self.getSubtitles()
108         self.assertTrue(subtitles['it'] is not None)
109
110     def test_youtube_nosubtitles(self):
111         self.DL.expect_warning('video doesn\'t have subtitles')
112         self.url = 'n5BB19UTcdA'
113         self.DL.params['writesubtitles'] = True
114         self.DL.params['allsubtitles'] = True
115         subtitles = self.getSubtitles()
116         self.assertEqual(len(subtitles), 0)
117
118     def test_youtube_multiple_langs(self):
119         self.url = 'QRS8MkLhQmM'
120         self.DL.params['writesubtitles'] = True
121         langs = ['it', 'fr', 'de']
122         self.DL.params['subtitleslangs'] = langs
123         subtitles = self.getSubtitles()
124         for lang in langs:
125             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
126
127
128 class TestDailymotionSubtitles(BaseTestSubtitles):
129     url = 'http://www.dailymotion.com/video/xczg00'
130     IE = DailymotionIE
131
132     def test_subtitles(self):
133         self.DL.params['writesubtitles'] = True
134         subtitles = self.getSubtitles()
135         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
136
137     def test_subtitles_lang(self):
138         self.DL.params['writesubtitles'] = True
139         self.DL.params['subtitleslangs'] = ['fr']
140         subtitles = self.getSubtitles()
141         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
142
143     def test_allsubtitles(self):
144         self.DL.params['writesubtitles'] = True
145         self.DL.params['allsubtitles'] = True
146         subtitles = self.getSubtitles()
147         self.assertTrue(len(subtitles.keys()) >= 6)
148
149     def test_nosubtitles(self):
150         self.DL.expect_warning('video doesn\'t have subtitles')
151         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
152         self.DL.params['writesubtitles'] = True
153         self.DL.params['allsubtitles'] = True
154         subtitles = self.getSubtitles()
155         self.assertEqual(len(subtitles), 0)
156
157     def test_multiple_langs(self):
158         self.DL.params['writesubtitles'] = True
159         langs = ['es', 'fr', 'de']
160         self.DL.params['subtitleslangs'] = langs
161         subtitles = self.getSubtitles()
162         for lang in langs:
163             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
164
165
166 class TestTedSubtitles(BaseTestSubtitles):
167     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
168     IE = TEDIE
169
170     def test_no_writesubtitles(self):
171         subtitles = self.getSubtitles()
172         self.assertFalse(subtitles)
173
174     def test_subtitles(self):
175         self.DL.params['writesubtitles'] = True
176         subtitles = self.getSubtitles()
177         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
178
179     def test_subtitles_lang(self):
180         self.DL.params['writesubtitles'] = True
181         self.DL.params['subtitleslangs'] = ['fr']
182         subtitles = self.getSubtitles()
183         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
184
185     def test_allsubtitles(self):
186         self.DL.params['writesubtitles'] = True
187         self.DL.params['allsubtitles'] = True
188         subtitles = self.getSubtitles()
189         self.assertTrue(len(subtitles.keys()) >= 28)
190
191     def test_list_subtitles(self):
192         self.DL.params['listsubtitles'] = True
193         info_dict = self.getInfoDict()
194         self.assertEqual(info_dict, None)
195
196     def test_multiple_langs(self):
197         self.DL.params['writesubtitles'] = True
198         langs = ['es', 'fr', 'de']
199         self.DL.params['subtitleslangs'] = langs
200         subtitles = self.getSubtitles()
201         for lang in langs:
202             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
203
204
205 class TestBlipTVSubtitles(BaseTestSubtitles):
206     url = 'http://blip.tv/a/a-6603250'
207     IE = BlipTVIE
208
209     def test_list_subtitles(self):
210         self.DL.params['listsubtitles'] = True
211         info_dict = self.getInfoDict()
212         self.assertEqual(info_dict, None)
213
214     def test_allsubtitles(self):
215         self.DL.params['writesubtitles'] = True
216         self.DL.params['allsubtitles'] = True
217         subtitles = self.getSubtitles()
218         self.assertEqual(set(subtitles.keys()), set(['en']))
219         self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
220
221
222 class TestVimeoSubtitles(BaseTestSubtitles):
223     url = 'http://vimeo.com/76979871'
224     IE = VimeoIE
225
226     def test_subtitles(self):
227         self.DL.params['writesubtitles'] = True
228         subtitles = self.getSubtitles()
229         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
230
231     def test_subtitles_lang(self):
232         self.DL.params['writesubtitles'] = True
233         self.DL.params['subtitleslangs'] = ['fr']
234         subtitles = self.getSubtitles()
235         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
236
237     def test_allsubtitles(self):
238         self.DL.params['writesubtitles'] = True
239         self.DL.params['allsubtitles'] = True
240         subtitles = self.getSubtitles()
241         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
242
243     def test_nosubtitles(self):
244         self.DL.expect_warning('video doesn\'t have subtitles')
245         self.url = 'http://vimeo.com/56015672'
246         self.DL.params['writesubtitles'] = True
247         self.DL.params['allsubtitles'] = True
248         subtitles = self.getSubtitles()
249         self.assertEqual(len(subtitles), 0)
250
251     def test_multiple_langs(self):
252         self.DL.params['writesubtitles'] = True
253         langs = ['es', 'fr', 'de']
254         self.DL.params['subtitleslangs'] = langs
255         subtitles = self.getSubtitles()
256         for lang in langs:
257             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
258
259
260 class TestWallaSubtitles(BaseTestSubtitles):
261     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
262     IE = WallaIE
263
264     def test_list_subtitles(self):
265         self.DL.expect_warning('Automatic Captions not supported by this server')
266         self.DL.params['listsubtitles'] = True
267         info_dict = self.getInfoDict()
268         self.assertEqual(info_dict, None)
269
270     def test_allsubtitles(self):
271         self.DL.expect_warning('Automatic Captions not supported by this server')
272         self.DL.params['writesubtitles'] = True
273         self.DL.params['allsubtitles'] = True
274         subtitles = self.getSubtitles()
275         self.assertEqual(set(subtitles.keys()), set(['heb']))
276         self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
277
278     def test_nosubtitles(self):
279         self.DL.expect_warning('video doesn\'t have subtitles')
280         self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
281         self.DL.params['writesubtitles'] = True
282         self.DL.params['allsubtitles'] = True
283         subtitles = self.getSubtitles()
284         self.assertEqual(len(subtitles), 0)
285
286
287 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
288     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
289     IE = CeskaTelevizeIE
290
291     def test_list_subtitles(self):
292         self.DL.expect_warning('Automatic Captions not supported by this server')
293         self.DL.params['listsubtitles'] = True
294         info_dict = self.getInfoDict()
295         self.assertEqual(info_dict, None)
296
297     def test_allsubtitles(self):
298         self.DL.expect_warning('Automatic Captions not supported by this server')
299         self.DL.params['writesubtitles'] = True
300         self.DL.params['allsubtitles'] = True
301         subtitles = self.getSubtitles()
302         self.assertEqual(set(subtitles.keys()), set(['cs']))
303         self.assertTrue(len(subtitles['cs']) > 20000)
304
305     def test_nosubtitles(self):
306         self.DL.expect_warning('video doesn\'t have subtitles')
307         self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
308         self.DL.params['writesubtitles'] = True
309         self.DL.params['allsubtitles'] = True
310         subtitles = self.getSubtitles()
311         self.assertEqual(len(subtitles), 0)
312
313
314 class TestLyndaSubtitles(BaseTestSubtitles):
315     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
316     IE = LyndaIE
317
318     def test_allsubtitles(self):
319         self.DL.params['writesubtitles'] = True
320         self.DL.params['allsubtitles'] = True
321         subtitles = self.getSubtitles()
322         self.assertEqual(set(subtitles.keys()), set(['en']))
323         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
324
325
326 class TestNPOSubtitles(BaseTestSubtitles):
327     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
328     IE = NPOIE
329
330     def test_allsubtitles(self):
331         self.DL.params['writesubtitles'] = True
332         self.DL.params['allsubtitles'] = True
333         subtitles = self.getSubtitles()
334         self.assertEqual(set(subtitles.keys()), set(['nl']))
335         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
336
337
338 class TestMTVSubtitles(BaseTestSubtitles):
339     url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
340     IE = ComedyCentralIE
341
342     def getInfoDict(self):
343         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
344
345     def test_allsubtitles(self):
346         self.DL.params['writesubtitles'] = True
347         self.DL.params['allsubtitles'] = True
348         subtitles = self.getSubtitles()
349         self.assertEqual(set(subtitles.keys()), set(['en']))
350         self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
351
352
353 class TestNRKSubtitles(BaseTestSubtitles):
354     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
355     IE = NRKTVIE
356
357     def test_allsubtitles(self):
358         self.DL.params['writesubtitles'] = True
359         self.DL.params['allsubtitles'] = True
360         subtitles = self.getSubtitles()
361         self.assertEqual(set(subtitles.keys()), set(['no']))
362         self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
363
364
365 class TestRaiSubtitles(BaseTestSubtitles):
366     url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
367     IE = RaiIE
368
369     def test_allsubtitles(self):
370         self.DL.params['writesubtitles'] = True
371         self.DL.params['allsubtitles'] = True
372         subtitles = self.getSubtitles()
373         self.assertEqual(set(subtitles.keys()), set(['it']))
374         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
375
376
377 class TestVikiSubtitles(BaseTestSubtitles):
378     url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
379     IE = VikiIE
380
381     def test_allsubtitles(self):
382         self.DL.params['writesubtitles'] = True
383         self.DL.params['allsubtitles'] = True
384         subtitles = self.getSubtitles()
385         self.assertEqual(set(subtitles.keys()), set(['en']))
386         self.assertEqual(md5(subtitles['en']), 'b0b781eeb45efd3f6398a925b259150b')
387
388
389 class TestThePlatformSubtitles(BaseTestSubtitles):
390     # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
391     # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
392     url = 'theplatform:JFUjUE1_ehvq'
393     IE = ThePlatformIE
394
395     def test_allsubtitles(self):
396         self.DL.params['writesubtitles'] = True
397         self.DL.params['allsubtitles'] = True
398         subtitles = self.getSubtitles()
399         self.assertEqual(set(subtitles.keys()), set(['en']))
400         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
401
402
403 if __name__ == '__main__':
404     unittest.main()