Merge branch 'sohu_fix' of https://github.com/yan12125/youtube-dl into yan12125-sohu_fix
[youtube-dl] / test / test_subtitles.py
1 #!/usr/bin/env python
2 from __future__ import unicode_literals
3
4 # Allow direct execution
5 import os
6 import sys
7 import unittest
8 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
10 from test.helper import FakeYDL, md5
11
12
13 from youtube_dl.extractor import (
14     BlipTVIE,
15     YoutubeIE,
16     DailymotionIE,
17     TEDIE,
18     VimeoIE,
19     WallaIE,
20     CeskaTelevizeIE,
21     LyndaIE,
22     NPOIE,
23     ComedyCentralIE,
24     NRKTVIE,
25     RaiIE,
26     VikiIE,
27     ThePlatformIE,
28     RTVEALaCartaIE,
29     FunnyOrDieIE,
30 )
31
32
33 class BaseTestSubtitles(unittest.TestCase):
34     url = None
35     IE = None
36
37     def setUp(self):
38         self.DL = FakeYDL()
39         self.ie = self.IE()
40         self.DL.add_info_extractor(self.ie)
41
42     def getInfoDict(self):
43         info_dict = self.DL.extract_info(self.url, download=False)
44         return info_dict
45
46     def getSubtitles(self):
47         info_dict = self.getInfoDict()
48         subtitles = info_dict['requested_subtitles']
49         if not subtitles:
50             return subtitles
51         for sub_info in subtitles.values():
52             if sub_info.get('data') is None:
53                 uf = self.DL.urlopen(sub_info['url'])
54                 sub_info['data'] = uf.read().decode('utf-8')
55         return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
56
57
58 class TestYoutubeSubtitles(BaseTestSubtitles):
59     url = 'QRS8MkLhQmM'
60     IE = YoutubeIE
61
62     def test_youtube_allsubtitles(self):
63         self.DL.params['writesubtitles'] = True
64         self.DL.params['allsubtitles'] = True
65         subtitles = self.getSubtitles()
66         self.assertEqual(len(subtitles.keys()), 13)
67         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
68         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
69         for lang in ['it', 'fr', 'de']:
70             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
71
72     def test_youtube_subtitles_sbv_format(self):
73         self.DL.params['writesubtitles'] = True
74         self.DL.params['subtitlesformat'] = 'sbv'
75         subtitles = self.getSubtitles()
76         self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
77
78     def test_youtube_subtitles_vtt_format(self):
79         self.DL.params['writesubtitles'] = True
80         self.DL.params['subtitlesformat'] = 'vtt'
81         subtitles = self.getSubtitles()
82         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
83
84     def test_youtube_automatic_captions(self):
85         self.url = '8YoUxe5ncPo'
86         self.DL.params['writeautomaticsub'] = True
87         self.DL.params['subtitleslangs'] = ['it']
88         subtitles = self.getSubtitles()
89         self.assertTrue(subtitles['it'] is not None)
90
91     def test_youtube_translated_subtitles(self):
92         # This video has a subtitles track, which can be translated
93         self.url = 'Ky9eprVWzlI'
94         self.DL.params['writeautomaticsub'] = True
95         self.DL.params['subtitleslangs'] = ['it']
96         subtitles = self.getSubtitles()
97         self.assertTrue(subtitles['it'] is not None)
98
99     def test_youtube_nosubtitles(self):
100         self.DL.expect_warning('video doesn\'t have subtitles')
101         self.url = 'n5BB19UTcdA'
102         self.DL.params['writesubtitles'] = True
103         self.DL.params['allsubtitles'] = True
104         subtitles = self.getSubtitles()
105         self.assertFalse(subtitles)
106
107
108 class TestDailymotionSubtitles(BaseTestSubtitles):
109     url = 'http://www.dailymotion.com/video/xczg00'
110     IE = DailymotionIE
111
112     def test_allsubtitles(self):
113         self.DL.params['writesubtitles'] = True
114         self.DL.params['allsubtitles'] = True
115         subtitles = self.getSubtitles()
116         self.assertTrue(len(subtitles.keys()) >= 6)
117         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
118         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
119         for lang in ['es', 'fr', 'de']:
120             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
121
122     def test_nosubtitles(self):
123         self.DL.expect_warning('video doesn\'t have subtitles')
124         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
125         self.DL.params['writesubtitles'] = True
126         self.DL.params['allsubtitles'] = True
127         subtitles = self.getSubtitles()
128         self.assertFalse(subtitles)
129
130
131 class TestTedSubtitles(BaseTestSubtitles):
132     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
133     IE = TEDIE
134
135     def test_allsubtitles(self):
136         self.DL.params['writesubtitles'] = True
137         self.DL.params['allsubtitles'] = True
138         subtitles = self.getSubtitles()
139         self.assertTrue(len(subtitles.keys()) >= 28)
140         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
141         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
142         for lang in ['es', 'fr', 'de']:
143             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
144
145
146 class TestBlipTVSubtitles(BaseTestSubtitles):
147     url = 'http://blip.tv/a/a-6603250'
148     IE = BlipTVIE
149
150     def test_allsubtitles(self):
151         self.DL.params['writesubtitles'] = True
152         self.DL.params['allsubtitles'] = True
153         subtitles = self.getSubtitles()
154         self.assertEqual(set(subtitles.keys()), set(['en']))
155         self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
156
157
158 class TestVimeoSubtitles(BaseTestSubtitles):
159     url = 'http://vimeo.com/76979871'
160     IE = VimeoIE
161
162     def test_allsubtitles(self):
163         self.DL.params['writesubtitles'] = True
164         self.DL.params['allsubtitles'] = True
165         subtitles = self.getSubtitles()
166         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
167         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
168         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
169
170     def test_nosubtitles(self):
171         self.DL.expect_warning('video doesn\'t have subtitles')
172         self.url = 'http://vimeo.com/56015672'
173         self.DL.params['writesubtitles'] = True
174         self.DL.params['allsubtitles'] = True
175         subtitles = self.getSubtitles()
176         self.assertFalse(subtitles)
177
178
179 class TestWallaSubtitles(BaseTestSubtitles):
180     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
181     IE = WallaIE
182
183     def test_allsubtitles(self):
184         self.DL.expect_warning('Automatic Captions not supported by this server')
185         self.DL.params['writesubtitles'] = True
186         self.DL.params['allsubtitles'] = True
187         subtitles = self.getSubtitles()
188         self.assertEqual(set(subtitles.keys()), set(['heb']))
189         self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
190
191     def test_nosubtitles(self):
192         self.DL.expect_warning('video doesn\'t have subtitles')
193         self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
194         self.DL.params['writesubtitles'] = True
195         self.DL.params['allsubtitles'] = True
196         subtitles = self.getSubtitles()
197         self.assertFalse(subtitles)
198
199
200 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
201     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
202     IE = CeskaTelevizeIE
203
204     def test_allsubtitles(self):
205         self.DL.expect_warning('Automatic Captions not supported by this server')
206         self.DL.params['writesubtitles'] = True
207         self.DL.params['allsubtitles'] = True
208         subtitles = self.getSubtitles()
209         self.assertEqual(set(subtitles.keys()), set(['cs']))
210         self.assertTrue(len(subtitles['cs']) > 20000)
211
212     def test_nosubtitles(self):
213         self.DL.expect_warning('video doesn\'t have subtitles')
214         self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
215         self.DL.params['writesubtitles'] = True
216         self.DL.params['allsubtitles'] = True
217         subtitles = self.getSubtitles()
218         self.assertFalse(subtitles)
219
220
221 class TestLyndaSubtitles(BaseTestSubtitles):
222     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
223     IE = LyndaIE
224
225     def test_allsubtitles(self):
226         self.DL.params['writesubtitles'] = True
227         self.DL.params['allsubtitles'] = True
228         subtitles = self.getSubtitles()
229         self.assertEqual(set(subtitles.keys()), set(['en']))
230         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
231
232
233 class TestNPOSubtitles(BaseTestSubtitles):
234     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
235     IE = NPOIE
236
237     def test_allsubtitles(self):
238         self.DL.params['writesubtitles'] = True
239         self.DL.params['allsubtitles'] = True
240         subtitles = self.getSubtitles()
241         self.assertEqual(set(subtitles.keys()), set(['nl']))
242         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
243
244
245 class TestMTVSubtitles(BaseTestSubtitles):
246     url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
247     IE = ComedyCentralIE
248
249     def getInfoDict(self):
250         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
251
252     def test_allsubtitles(self):
253         self.DL.params['writesubtitles'] = True
254         self.DL.params['allsubtitles'] = True
255         subtitles = self.getSubtitles()
256         self.assertEqual(set(subtitles.keys()), set(['en']))
257         self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
258
259
260 class TestNRKSubtitles(BaseTestSubtitles):
261     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
262     IE = NRKTVIE
263
264     def test_allsubtitles(self):
265         self.DL.params['writesubtitles'] = True
266         self.DL.params['allsubtitles'] = True
267         subtitles = self.getSubtitles()
268         self.assertEqual(set(subtitles.keys()), set(['no']))
269         self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
270
271
272 class TestRaiSubtitles(BaseTestSubtitles):
273     url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
274     IE = RaiIE
275
276     def test_allsubtitles(self):
277         self.DL.params['writesubtitles'] = True
278         self.DL.params['allsubtitles'] = True
279         subtitles = self.getSubtitles()
280         self.assertEqual(set(subtitles.keys()), set(['it']))
281         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
282
283
284 class TestVikiSubtitles(BaseTestSubtitles):
285     url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
286     IE = VikiIE
287
288     def test_allsubtitles(self):
289         self.DL.params['writesubtitles'] = True
290         self.DL.params['allsubtitles'] = True
291         subtitles = self.getSubtitles()
292         self.assertEqual(set(subtitles.keys()), set(['en']))
293         self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
294
295
296 class TestThePlatformSubtitles(BaseTestSubtitles):
297     # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
298     # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
299     url = 'theplatform:JFUjUE1_ehvq'
300     IE = ThePlatformIE
301
302     def test_allsubtitles(self):
303         self.DL.params['writesubtitles'] = True
304         self.DL.params['allsubtitles'] = True
305         subtitles = self.getSubtitles()
306         self.assertEqual(set(subtitles.keys()), set(['en']))
307         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
308
309
310 class TestRtveSubtitles(BaseTestSubtitles):
311     url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
312     IE = RTVEALaCartaIE
313
314     def test_allsubtitles(self):
315         print('Skipping, only available from Spain')
316         return
317         self.DL.params['writesubtitles'] = True
318         self.DL.params['allsubtitles'] = True
319         subtitles = self.getSubtitles()
320         self.assertEqual(set(subtitles.keys()), set(['es']))
321         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
322
323
324 class TestFunnyOrDieSubtitles(BaseTestSubtitles):
325     url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
326     IE = FunnyOrDieIE
327
328     def test_allsubtitles(self):
329         self.DL.params['writesubtitles'] = True
330         self.DL.params['allsubtitles'] = True
331         subtitles = self.getSubtitles()
332         self.assertEqual(set(subtitles.keys()), set(['en']))
333         self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
334
335
336 if __name__ == '__main__':
337     unittest.main()