[pornhub] Extract metadata from JSON-LD (closes #26614)
[youtube-dl] / test / test_subtitles.py
1 #!/usr/bin/env python
2 from __future__ import unicode_literals
3
4 # Allow direct execution
5 import os
6 import sys
7 import unittest
8 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
10 from test.helper import FakeYDL, md5
11
12
13 from youtube_dl.extractor import (
14     YoutubeIE,
15     DailymotionIE,
16     TEDIE,
17     VimeoIE,
18     WallaIE,
19     CeskaTelevizeIE,
20     LyndaIE,
21     NPOIE,
22     ComedyCentralIE,
23     NRKTVIE,
24     RaiPlayIE,
25     VikiIE,
26     ThePlatformIE,
27     ThePlatformFeedIE,
28     RTVEALaCartaIE,
29     DemocracynowIE,
30 )
31
32
33 class BaseTestSubtitles(unittest.TestCase):
34     url = None
35     IE = None
36
37     def setUp(self):
38         self.DL = FakeYDL()
39         self.ie = self.IE()
40         self.DL.add_info_extractor(self.ie)
41
42     def getInfoDict(self):
43         info_dict = self.DL.extract_info(self.url, download=False)
44         return info_dict
45
46     def getSubtitles(self):
47         info_dict = self.getInfoDict()
48         subtitles = info_dict['requested_subtitles']
49         if not subtitles:
50             return subtitles
51         for sub_info in subtitles.values():
52             if sub_info.get('data') is None:
53                 uf = self.DL.urlopen(sub_info['url'])
54                 sub_info['data'] = uf.read().decode('utf-8')
55         return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
56
57
58 class TestYoutubeSubtitles(BaseTestSubtitles):
59     url = 'QRS8MkLhQmM'
60     IE = YoutubeIE
61
62     def test_youtube_allsubtitles(self):
63         self.DL.params['writesubtitles'] = True
64         self.DL.params['allsubtitles'] = True
65         subtitles = self.getSubtitles()
66         self.assertEqual(len(subtitles.keys()), 13)
67         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
68         self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
69         for lang in ['fr', 'de']:
70             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
71
72     def test_youtube_subtitles_ttml_format(self):
73         self.DL.params['writesubtitles'] = True
74         self.DL.params['subtitlesformat'] = 'ttml'
75         subtitles = self.getSubtitles()
76         self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
77
78     def test_youtube_subtitles_vtt_format(self):
79         self.DL.params['writesubtitles'] = True
80         self.DL.params['subtitlesformat'] = 'vtt'
81         subtitles = self.getSubtitles()
82         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
83
84     def test_youtube_automatic_captions(self):
85         self.url = '8YoUxe5ncPo'
86         self.DL.params['writeautomaticsub'] = True
87         self.DL.params['subtitleslangs'] = ['it']
88         subtitles = self.getSubtitles()
89         self.assertTrue(subtitles['it'] is not None)
90
91     def test_youtube_translated_subtitles(self):
92         # This video has a subtitles track, which can be translated
93         self.url = 'Ky9eprVWzlI'
94         self.DL.params['writeautomaticsub'] = True
95         self.DL.params['subtitleslangs'] = ['it']
96         subtitles = self.getSubtitles()
97         self.assertTrue(subtitles['it'] is not None)
98
99     def test_youtube_nosubtitles(self):
100         self.DL.expect_warning('video doesn\'t have subtitles')
101         self.url = 'n5BB19UTcdA'
102         self.DL.params['writesubtitles'] = True
103         self.DL.params['allsubtitles'] = True
104         subtitles = self.getSubtitles()
105         self.assertFalse(subtitles)
106
107
108 class TestDailymotionSubtitles(BaseTestSubtitles):
109     url = 'http://www.dailymotion.com/video/xczg00'
110     IE = DailymotionIE
111
112     def test_allsubtitles(self):
113         self.DL.params['writesubtitles'] = True
114         self.DL.params['allsubtitles'] = True
115         subtitles = self.getSubtitles()
116         self.assertTrue(len(subtitles.keys()) >= 6)
117         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
118         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
119         for lang in ['es', 'fr', 'de']:
120             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
121
122     def test_nosubtitles(self):
123         self.DL.expect_warning('video doesn\'t have subtitles')
124         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
125         self.DL.params['writesubtitles'] = True
126         self.DL.params['allsubtitles'] = True
127         subtitles = self.getSubtitles()
128         self.assertFalse(subtitles)
129
130
131 class TestTedSubtitles(BaseTestSubtitles):
132     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
133     IE = TEDIE
134
135     def test_allsubtitles(self):
136         self.DL.params['writesubtitles'] = True
137         self.DL.params['allsubtitles'] = True
138         subtitles = self.getSubtitles()
139         self.assertTrue(len(subtitles.keys()) >= 28)
140         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
141         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
142         for lang in ['es', 'fr', 'de']:
143             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
144
145
146 class TestVimeoSubtitles(BaseTestSubtitles):
147     url = 'http://vimeo.com/76979871'
148     IE = VimeoIE
149
150     def test_allsubtitles(self):
151         self.DL.params['writesubtitles'] = True
152         self.DL.params['allsubtitles'] = True
153         subtitles = self.getSubtitles()
154         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
155         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
156         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
157
158     def test_nosubtitles(self):
159         self.DL.expect_warning('video doesn\'t have subtitles')
160         self.url = 'http://vimeo.com/56015672'
161         self.DL.params['writesubtitles'] = True
162         self.DL.params['allsubtitles'] = True
163         subtitles = self.getSubtitles()
164         self.assertFalse(subtitles)
165
166
167 class TestWallaSubtitles(BaseTestSubtitles):
168     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
169     IE = WallaIE
170
171     def test_allsubtitles(self):
172         self.DL.expect_warning('Automatic Captions not supported by this server')
173         self.DL.params['writesubtitles'] = True
174         self.DL.params['allsubtitles'] = True
175         subtitles = self.getSubtitles()
176         self.assertEqual(set(subtitles.keys()), set(['heb']))
177         self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
178
179     def test_nosubtitles(self):
180         self.DL.expect_warning('video doesn\'t have subtitles')
181         self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
182         self.DL.params['writesubtitles'] = True
183         self.DL.params['allsubtitles'] = True
184         subtitles = self.getSubtitles()
185         self.assertFalse(subtitles)
186
187
188 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
189     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
190     IE = CeskaTelevizeIE
191
192     def test_allsubtitles(self):
193         self.DL.expect_warning('Automatic Captions not supported by this server')
194         self.DL.params['writesubtitles'] = True
195         self.DL.params['allsubtitles'] = True
196         subtitles = self.getSubtitles()
197         self.assertEqual(set(subtitles.keys()), set(['cs']))
198         self.assertTrue(len(subtitles['cs']) > 20000)
199
200     def test_nosubtitles(self):
201         self.DL.expect_warning('video doesn\'t have subtitles')
202         self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
203         self.DL.params['writesubtitles'] = True
204         self.DL.params['allsubtitles'] = True
205         subtitles = self.getSubtitles()
206         self.assertFalse(subtitles)
207
208
209 class TestLyndaSubtitles(BaseTestSubtitles):
210     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
211     IE = LyndaIE
212
213     def test_allsubtitles(self):
214         self.DL.params['writesubtitles'] = True
215         self.DL.params['allsubtitles'] = True
216         subtitles = self.getSubtitles()
217         self.assertEqual(set(subtitles.keys()), set(['en']))
218         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
219
220
221 class TestNPOSubtitles(BaseTestSubtitles):
222     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
223     IE = NPOIE
224
225     def test_allsubtitles(self):
226         self.DL.params['writesubtitles'] = True
227         self.DL.params['allsubtitles'] = True
228         subtitles = self.getSubtitles()
229         self.assertEqual(set(subtitles.keys()), set(['nl']))
230         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
231
232
233 class TestMTVSubtitles(BaseTestSubtitles):
234     url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
235     IE = ComedyCentralIE
236
237     def getInfoDict(self):
238         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
239
240     def test_allsubtitles(self):
241         self.DL.params['writesubtitles'] = True
242         self.DL.params['allsubtitles'] = True
243         subtitles = self.getSubtitles()
244         self.assertEqual(set(subtitles.keys()), set(['en']))
245         self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
246
247
248 class TestNRKSubtitles(BaseTestSubtitles):
249     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
250     IE = NRKTVIE
251
252     def test_allsubtitles(self):
253         self.DL.params['writesubtitles'] = True
254         self.DL.params['allsubtitles'] = True
255         subtitles = self.getSubtitles()
256         self.assertEqual(set(subtitles.keys()), set(['no']))
257         self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
258
259
260 class TestRaiPlaySubtitles(BaseTestSubtitles):
261     url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
262     IE = RaiPlayIE
263
264     def test_allsubtitles(self):
265         self.DL.params['writesubtitles'] = True
266         self.DL.params['allsubtitles'] = True
267         subtitles = self.getSubtitles()
268         self.assertEqual(set(subtitles.keys()), set(['it']))
269         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
270
271
272 class TestVikiSubtitles(BaseTestSubtitles):
273     url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
274     IE = VikiIE
275
276     def test_allsubtitles(self):
277         self.DL.params['writesubtitles'] = True
278         self.DL.params['allsubtitles'] = True
279         subtitles = self.getSubtitles()
280         self.assertEqual(set(subtitles.keys()), set(['en']))
281         self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
282
283
284 class TestThePlatformSubtitles(BaseTestSubtitles):
285     # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
286     # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
287     url = 'theplatform:JFUjUE1_ehvq'
288     IE = ThePlatformIE
289
290     def test_allsubtitles(self):
291         self.DL.params['writesubtitles'] = True
292         self.DL.params['allsubtitles'] = True
293         subtitles = self.getSubtitles()
294         self.assertEqual(set(subtitles.keys()), set(['en']))
295         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
296
297
298 class TestThePlatformFeedSubtitles(BaseTestSubtitles):
299     url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
300     IE = ThePlatformFeedIE
301
302     def test_allsubtitles(self):
303         self.DL.params['writesubtitles'] = True
304         self.DL.params['allsubtitles'] = True
305         subtitles = self.getSubtitles()
306         self.assertEqual(set(subtitles.keys()), set(['en']))
307         self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
308
309
310 class TestRtveSubtitles(BaseTestSubtitles):
311     url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
312     IE = RTVEALaCartaIE
313
314     def test_allsubtitles(self):
315         print('Skipping, only available from Spain')
316         return
317         self.DL.params['writesubtitles'] = True
318         self.DL.params['allsubtitles'] = True
319         subtitles = self.getSubtitles()
320         self.assertEqual(set(subtitles.keys()), set(['es']))
321         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
322
323
324 class TestDemocracynowSubtitles(BaseTestSubtitles):
325     url = 'http://www.democracynow.org/shows/2015/7/3'
326     IE = DemocracynowIE
327
328     def test_allsubtitles(self):
329         self.DL.params['writesubtitles'] = True
330         self.DL.params['allsubtitles'] = True
331         subtitles = self.getSubtitles()
332         self.assertEqual(set(subtitles.keys()), set(['en']))
333         self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
334
335     def test_subtitles_in_page(self):
336         self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
337         self.DL.params['writesubtitles'] = True
338         self.DL.params['allsubtitles'] = True
339         subtitles = self.getSubtitles()
340         self.assertEqual(set(subtitles.keys()), set(['en']))
341         self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
342
343
344 if __name__ == '__main__':
345     unittest.main()