[telequebec] Fix issues (closes #26368)
[youtube-dl] / youtube_dl / extractor / telequebec.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7     int_or_none,
8     smuggle_url,
9     try_get,
10     unified_timestamp,
11 )
12
13
14 class TeleQuebecBaseIE(InfoExtractor):
15     @staticmethod
16     def _result(url, ie_key):
17         return {
18             '_type': 'url_transparent',
19             'url': smuggle_url(url, {'geo_countries': ['CA']}),
20             'ie_key': ie_key,
21         }
22
23     @staticmethod
24     def _limelight_result(media_id):
25         return TeleQuebecBaseIE._result(
26             'limelight:media:' + media_id, 'LimelightMedia')
27
28     @staticmethod
29     def _brightcove_result(brightcove_id):
30         return TeleQuebecBaseIE._result(
31             'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
32             % brightcove_id, 'BrightcoveNew')
33
34
35 class TeleQuebecIE(TeleQuebecBaseIE):
36     _VALID_URL = r'''(?x)
37                     https?://
38                         (?:
39                             zonevideo\.telequebec\.tv/media|
40                             coucou\.telequebec\.tv/videos
41                         )/(?P<id>\d+)
42                     '''
43     _TESTS = [{
44         # available till 01.01.2023
45         'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
46         'info_dict': {
47             'id': '577116881b4b439084e6b1cf4ef8b1b3',
48             'ext': 'mp4',
49             'title': 'Un petit choc et puis repart!',
50             'description': 'md5:067bc84bd6afecad85e69d1000730907',
51         },
52         'params': {
53             'skip_download': True,
54         },
55     }, {
56         'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
57         'info_dict': {
58             'id': '6167180337001',
59             'ext': 'mp4',
60             'title': 'Le soleil',
61             'description': 'md5:64289c922a8de2abbe99c354daffde02',
62             'uploader_id': '6150020952001',
63             'upload_date': '20200625',
64             'timestamp': 1593090307,
65         },
66         'params': {
67             'format': 'bestvideo',
68             'skip_download': True,
69         },
70         'add_ie': ['BrightcoveNew'],
71     }, {
72         # no description
73         'url': 'http://zonevideo.telequebec.tv/media/30261',
74         'only_matching': True,
75     }, {
76         'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
77         'only_matching': True,
78     }]
79
80     def _real_extract(self, url):
81         media_id = self._match_id(url)
82
83         media_data = self._download_json(
84             'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
85             media_id)['media']
86
87         source_id = media_data['streamInfo']['sourceId']
88         source = (try_get(
89             media_data, lambda x: x['streamInfo']['source'],
90             compat_str) or 'limelight').lower()
91         if source == 'brightcove':
92             info = self._brightcove_result(source_id)
93         else:
94             info = self._limelight_result(source_id)
95         info.update({
96             'title': media_data.get('title'),
97             'description': try_get(
98                 media_data, lambda x: x['descriptions'][0]['text'], compat_str),
99             'duration': int_or_none(
100                 media_data.get('durationInMilliseconds'), 1000),
101         })
102         return info
103
104
105 class TeleQuebecSquatIE(InfoExtractor):
106     _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
107     _TESTS = [{
108         'url': 'https://squat.telequebec.tv/videos/9314',
109         'info_dict': {
110             'id': 'd59ae78112d542e793d83cc9d3a5b530',
111             'ext': 'mp4',
112             'title': 'Poupeflekta',
113             'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
114             'duration': 1351,
115             'timestamp': 1569057600,
116             'upload_date': '20190921',
117             'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
118             'season': 'Saison 3',
119             'season_number': 3,
120             'episode_number': 57,
121         },
122         'params': {
123             'skip_download': True,
124         },
125     }]
126
127     def _real_extract(self, url):
128         video_id = self._match_id(url)
129
130         video = self._download_json(
131             'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
132             video_id)
133
134         media_id = video['sourceId']
135
136         return {
137             '_type': 'url_transparent',
138             'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
139             'ie_key': TeleQuebecIE.ie_key(),
140             'id': media_id,
141             'title': video.get('titre'),
142             'description': video.get('description'),
143             'timestamp': unified_timestamp(video.get('datePublication')),
144             'series': video.get('container'),
145             'season': video.get('saison'),
146             'season_number': int_or_none(video.get('noSaison')),
147             'episode_number': int_or_none(video.get('episode')),
148         }
149
150
151 class TeleQuebecEmissionIE(TeleQuebecBaseIE):
152     _VALID_URL = r'''(?x)
153                     https?://
154                         (?:
155                             [^/]+\.telequebec\.tv/emissions/|
156                             (?:www\.)?telequebec\.tv/
157                         )
158                         (?P<id>[^?#&]+)
159                     '''
160     _TESTS = [{
161         'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
162         'info_dict': {
163             'id': '66648a6aef914fe3badda25e81a4d50a',
164             'ext': 'mp4',
165             'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
166             'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
167             'upload_date': '20171024',
168             'timestamp': 1508862118,
169         },
170         'params': {
171             'skip_download': True,
172         },
173     }, {
174         'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
175         'only_matching': True,
176     }, {
177         'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
178         'only_matching': True,
179     }, {
180         'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
181         'only_matching': True,
182     }]
183
184     def _real_extract(self, url):
185         display_id = self._match_id(url)
186
187         webpage = self._download_webpage(url, display_id)
188
189         media_id = self._search_regex(
190             r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
191             'limelight id')
192
193         info = self._limelight_result(media_id)
194         info.update({
195             'title': self._og_search_title(webpage, default=None),
196             'description': self._og_search_description(webpage, default=None),
197         })
198         return info
199
200
201 class TeleQuebecLiveIE(InfoExtractor):
202     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
203     _TEST = {
204         'url': 'http://zonevideo.telequebec.tv/endirect/',
205         'info_dict': {
206             'id': 'endirect',
207             'ext': 'mp4',
208             'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
209             'is_live': True,
210         },
211         'params': {
212             'skip_download': True,
213         },
214     }
215
216     def _real_extract(self, url):
217         video_id = self._match_id(url)
218
219         m3u8_url = None
220         webpage = self._download_webpage(
221             'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
222             fatal=False)
223         if webpage:
224             m3u8_url = self._search_regex(
225                 r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
226                 'm3u8 url', default=None, group='url')
227         if not m3u8_url:
228             m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
229         formats = self._extract_m3u8_formats(
230             m3u8_url, video_id, 'mp4', m3u8_id='hls')
231         self._sort_formats(formats)
232
233         return {
234             'id': video_id,
235             'title': self._live_title('Télé-Québec - En direct'),
236             'is_live': True,
237             'formats': formats,
238         }