[nrk] Convert to new subtitles system
[youtube-dl] / test / test_subtitles.py
1 #!/usr/bin/env python
2 from __future__ import unicode_literals
3
4 # Allow direct execution
5 import os
6 import sys
7 import unittest
8 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
10 from test.helper import FakeYDL, md5
11
12
13 from youtube_dl.extractor import (
14     BlipTVIE,
15     YoutubeIE,
16     DailymotionIE,
17     TEDIE,
18     VimeoIE,
19     WallaIE,
20     CeskaTelevizeIE,
21     LyndaIE,
22     NPOIE,
23     ComedyCentralIE,
24     NRKTVIE,
25 )
26
27
28 class BaseTestSubtitles(unittest.TestCase):
29     url = None
30     IE = None
31
32     def setUp(self):
33         self.DL = FakeYDL()
34         self.ie = self.IE()
35         self.DL.add_info_extractor(self.ie)
36
37     def getInfoDict(self):
38         info_dict = self.DL.extract_info(self.url, download=False)
39         return info_dict
40
41     def getSubtitles(self):
42         info_dict = self.getInfoDict()
43         subtitles = info_dict['requested_subtitles']
44         if not subtitles:
45             return subtitles
46         for sub_info in subtitles.values():
47             if sub_info.get('data') is None:
48                 uf = self.DL.urlopen(sub_info['url'])
49                 sub_info['data'] = uf.read().decode('utf-8')
50         return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
51
52
53 class TestYoutubeSubtitles(BaseTestSubtitles):
54     url = 'QRS8MkLhQmM'
55     IE = YoutubeIE
56
57     def test_youtube_subtitles(self):
58         self.DL.params['writesubtitles'] = True
59         subtitles = self.getSubtitles()
60         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
61
62     def test_youtube_subtitles_lang(self):
63         self.DL.params['writesubtitles'] = True
64         self.DL.params['subtitleslangs'] = ['it']
65         subtitles = self.getSubtitles()
66         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
67
68     def test_youtube_allsubtitles(self):
69         self.DL.params['writesubtitles'] = True
70         self.DL.params['allsubtitles'] = True
71         subtitles = self.getSubtitles()
72         self.assertEqual(len(subtitles.keys()), 13)
73
74     def test_youtube_subtitles_sbv_format(self):
75         self.DL.params['writesubtitles'] = True
76         self.DL.params['subtitlesformat'] = 'sbv'
77         subtitles = self.getSubtitles()
78         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
79
80     def test_youtube_subtitles_vtt_format(self):
81         self.DL.params['writesubtitles'] = True
82         self.DL.params['subtitlesformat'] = 'vtt'
83         subtitles = self.getSubtitles()
84         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
85
86     def test_youtube_list_subtitles(self):
87         self.DL.expect_warning('Video doesn\'t have automatic captions')
88         self.DL.params['listsubtitles'] = True
89         info_dict = self.getInfoDict()
90         self.assertEqual(info_dict, None)
91
92     def test_youtube_automatic_captions(self):
93         self.url = '8YoUxe5ncPo'
94         self.DL.params['writeautomaticsub'] = True
95         self.DL.params['subtitleslangs'] = ['it']
96         subtitles = self.getSubtitles()
97         self.assertTrue(subtitles['it'] is not None)
98
99     def test_youtube_translated_subtitles(self):
100         # This video has a subtitles track, which can be translated
101         self.url = 'Ky9eprVWzlI'
102         self.DL.params['writeautomaticsub'] = True
103         self.DL.params['subtitleslangs'] = ['it']
104         subtitles = self.getSubtitles()
105         self.assertTrue(subtitles['it'] is not None)
106
107     def test_youtube_nosubtitles(self):
108         self.DL.expect_warning('video doesn\'t have subtitles')
109         self.url = 'n5BB19UTcdA'
110         self.DL.params['writesubtitles'] = True
111         self.DL.params['allsubtitles'] = True
112         subtitles = self.getSubtitles()
113         self.assertEqual(len(subtitles), 0)
114
115     def test_youtube_multiple_langs(self):
116         self.url = 'QRS8MkLhQmM'
117         self.DL.params['writesubtitles'] = True
118         langs = ['it', 'fr', 'de']
119         self.DL.params['subtitleslangs'] = langs
120         subtitles = self.getSubtitles()
121         for lang in langs:
122             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
123
124
125 class TestDailymotionSubtitles(BaseTestSubtitles):
126     url = 'http://www.dailymotion.com/video/xczg00'
127     IE = DailymotionIE
128
129     def test_subtitles(self):
130         self.DL.params['writesubtitles'] = True
131         subtitles = self.getSubtitles()
132         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
133
134     def test_subtitles_lang(self):
135         self.DL.params['writesubtitles'] = True
136         self.DL.params['subtitleslangs'] = ['fr']
137         subtitles = self.getSubtitles()
138         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
139
140     def test_allsubtitles(self):
141         self.DL.params['writesubtitles'] = True
142         self.DL.params['allsubtitles'] = True
143         subtitles = self.getSubtitles()
144         self.assertTrue(len(subtitles.keys()) >= 6)
145
146     def test_nosubtitles(self):
147         self.DL.expect_warning('video doesn\'t have subtitles')
148         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
149         self.DL.params['writesubtitles'] = True
150         self.DL.params['allsubtitles'] = True
151         subtitles = self.getSubtitles()
152         self.assertEqual(len(subtitles), 0)
153
154     def test_multiple_langs(self):
155         self.DL.params['writesubtitles'] = True
156         langs = ['es', 'fr', 'de']
157         self.DL.params['subtitleslangs'] = langs
158         subtitles = self.getSubtitles()
159         for lang in langs:
160             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
161
162
163 class TestTedSubtitles(BaseTestSubtitles):
164     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
165     IE = TEDIE
166
167     def test_no_writesubtitles(self):
168         subtitles = self.getSubtitles()
169         self.assertFalse(subtitles)
170
171     def test_subtitles(self):
172         self.DL.params['writesubtitles'] = True
173         subtitles = self.getSubtitles()
174         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
175
176     def test_subtitles_lang(self):
177         self.DL.params['writesubtitles'] = True
178         self.DL.params['subtitleslangs'] = ['fr']
179         subtitles = self.getSubtitles()
180         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
181
182     def test_allsubtitles(self):
183         self.DL.params['writesubtitles'] = True
184         self.DL.params['allsubtitles'] = True
185         subtitles = self.getSubtitles()
186         self.assertTrue(len(subtitles.keys()) >= 28)
187
188     def test_list_subtitles(self):
189         self.DL.params['listsubtitles'] = True
190         info_dict = self.getInfoDict()
191         self.assertEqual(info_dict, None)
192
193     def test_multiple_langs(self):
194         self.DL.params['writesubtitles'] = True
195         langs = ['es', 'fr', 'de']
196         self.DL.params['subtitleslangs'] = langs
197         subtitles = self.getSubtitles()
198         for lang in langs:
199             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
200
201
202 class TestBlipTVSubtitles(BaseTestSubtitles):
203     url = 'http://blip.tv/a/a-6603250'
204     IE = BlipTVIE
205
206     def test_list_subtitles(self):
207         self.DL.params['listsubtitles'] = True
208         info_dict = self.getInfoDict()
209         self.assertEqual(info_dict, None)
210
211     def test_allsubtitles(self):
212         self.DL.params['writesubtitles'] = True
213         self.DL.params['allsubtitles'] = True
214         subtitles = self.getSubtitles()
215         self.assertEqual(set(subtitles.keys()), set(['en']))
216         self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
217
218
219 class TestVimeoSubtitles(BaseTestSubtitles):
220     url = 'http://vimeo.com/76979871'
221     IE = VimeoIE
222
223     def test_subtitles(self):
224         self.DL.params['writesubtitles'] = True
225         subtitles = self.getSubtitles()
226         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
227
228     def test_subtitles_lang(self):
229         self.DL.params['writesubtitles'] = True
230         self.DL.params['subtitleslangs'] = ['fr']
231         subtitles = self.getSubtitles()
232         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
233
234     def test_allsubtitles(self):
235         self.DL.params['writesubtitles'] = True
236         self.DL.params['allsubtitles'] = True
237         subtitles = self.getSubtitles()
238         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
239
240     def test_nosubtitles(self):
241         self.DL.expect_warning('video doesn\'t have subtitles')
242         self.url = 'http://vimeo.com/56015672'
243         self.DL.params['writesubtitles'] = True
244         self.DL.params['allsubtitles'] = True
245         subtitles = self.getSubtitles()
246         self.assertEqual(len(subtitles), 0)
247
248     def test_multiple_langs(self):
249         self.DL.params['writesubtitles'] = True
250         langs = ['es', 'fr', 'de']
251         self.DL.params['subtitleslangs'] = langs
252         subtitles = self.getSubtitles()
253         for lang in langs:
254             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
255
256
257 class TestWallaSubtitles(BaseTestSubtitles):
258     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
259     IE = WallaIE
260
261     def test_list_subtitles(self):
262         self.DL.expect_warning('Automatic Captions not supported by this server')
263         self.DL.params['listsubtitles'] = True
264         info_dict = self.getInfoDict()
265         self.assertEqual(info_dict, None)
266
267     def test_allsubtitles(self):
268         self.DL.expect_warning('Automatic Captions not supported by this server')
269         self.DL.params['writesubtitles'] = True
270         self.DL.params['allsubtitles'] = True
271         subtitles = self.getSubtitles()
272         self.assertEqual(set(subtitles.keys()), set(['heb']))
273         self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
274
275     def test_nosubtitles(self):
276         self.DL.expect_warning('video doesn\'t have subtitles')
277         self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
278         self.DL.params['writesubtitles'] = True
279         self.DL.params['allsubtitles'] = True
280         subtitles = self.getSubtitles()
281         self.assertEqual(len(subtitles), 0)
282
283
284 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
285     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
286     IE = CeskaTelevizeIE
287
288     def test_list_subtitles(self):
289         self.DL.expect_warning('Automatic Captions not supported by this server')
290         self.DL.params['listsubtitles'] = True
291         info_dict = self.getInfoDict()
292         self.assertEqual(info_dict, None)
293
294     def test_allsubtitles(self):
295         self.DL.expect_warning('Automatic Captions not supported by this server')
296         self.DL.params['writesubtitles'] = True
297         self.DL.params['allsubtitles'] = True
298         subtitles = self.getSubtitles()
299         self.assertEqual(set(subtitles.keys()), set(['cs']))
300         self.assertTrue(len(subtitles['cs']) > 20000)
301
302     def test_nosubtitles(self):
303         self.DL.expect_warning('video doesn\'t have subtitles')
304         self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
305         self.DL.params['writesubtitles'] = True
306         self.DL.params['allsubtitles'] = True
307         subtitles = self.getSubtitles()
308         self.assertEqual(len(subtitles), 0)
309
310
311 class TestLyndaSubtitles(BaseTestSubtitles):
312     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
313     IE = LyndaIE
314
315     def test_allsubtitles(self):
316         self.DL.params['writesubtitles'] = True
317         self.DL.params['allsubtitles'] = True
318         subtitles = self.getSubtitles()
319         self.assertEqual(set(subtitles.keys()), set(['en']))
320         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
321
322
323 class TestNPOSubtitles(BaseTestSubtitles):
324     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
325     IE = NPOIE
326
327     def test_allsubtitles(self):
328         self.DL.params['writesubtitles'] = True
329         self.DL.params['allsubtitles'] = True
330         subtitles = self.getSubtitles()
331         self.assertEqual(set(subtitles.keys()), set(['nl']))
332         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
333
334
335 class TestMTVSubtitles(BaseTestSubtitles):
336     url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
337     IE = ComedyCentralIE
338
339     def getInfoDict(self):
340         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
341
342     def test_allsubtitles(self):
343         self.DL.params['writesubtitles'] = True
344         self.DL.params['allsubtitles'] = True
345         subtitles = self.getSubtitles()
346         self.assertEqual(set(subtitles.keys()), set(['en']))
347         self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
348
349
350 class TestNRKSubtitles(BaseTestSubtitles):
351     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
352     IE = NRKTVIE
353
354     def test_allsubtitles(self):
355         self.DL.params['writesubtitles'] = True
356         self.DL.params['allsubtitles'] = True
357         subtitles = self.getSubtitles()
358         self.assertEqual(set(subtitles.keys()), set(['no']))
359         self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
360
361
362 if __name__ == '__main__':
363     unittest.main()