[telequebec] Add support for coucou.telequebec.tv (#22482)
[youtube-dl] / youtube_dl / extractor / telequebec.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7     int_or_none,
8     smuggle_url,
9     try_get,
10 )
11
12
13 class TeleQuebecBaseIE(InfoExtractor):
14     @staticmethod
15     def _limelight_result(media_id):
16         return {
17             '_type': 'url_transparent',
18             'url': smuggle_url(
19                 'limelight:media:' + media_id, {'geo_countries': ['CA']}),
20             'ie_key': 'LimelightMedia',
21         }
22
23
24 class TeleQuebecIE(TeleQuebecBaseIE):
25     _VALID_URL = r'''(?x)
26                     https?://
27                         (?:
28                             zonevideo\.telequebec\.tv/media|
29                             coucou\.telequebec\.tv/videos
30                         )/(?P<id>\d+)
31                     '''
32     _TESTS = [{
33         # available till 01.01.2023
34         'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
35         'info_dict': {
36             'id': '577116881b4b439084e6b1cf4ef8b1b3',
37             'ext': 'mp4',
38             'title': 'Un petit choc et puis repart!',
39             'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
40             'upload_date': '20180222',
41             'timestamp': 1519326631,
42         },
43         'params': {
44             'skip_download': True,
45         },
46     }, {
47         # no description
48         'url': 'http://zonevideo.telequebec.tv/media/30261',
49         'only_matching': True,
50     }, {
51         'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
52         'only_matching': True,
53     }]
54
55     def _real_extract(self, url):
56         media_id = self._match_id(url)
57
58         media_data = self._download_json(
59             'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
60             media_id)['media']
61
62         info = self._limelight_result(media_data['streamInfo']['sourceId'])
63         info.update({
64             'title': media_data.get('title'),
65             'description': try_get(
66                 media_data, lambda x: x['descriptions'][0]['text'], compat_str),
67             'duration': int_or_none(
68                 media_data.get('durationInMilliseconds'), 1000),
69         })
70         return info
71
72
73 class TeleQuebecEmissionIE(TeleQuebecBaseIE):
74     _VALID_URL = r'''(?x)
75                     https?://
76                         (?:
77                             [^/]+\.telequebec\.tv/emissions/|
78                             (?:www\.)?telequebec\.tv/
79                         )
80                         (?P<id>[^?#&]+)
81                     '''
82     _TESTS = [{
83         'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
84         'info_dict': {
85             'id': '66648a6aef914fe3badda25e81a4d50a',
86             'ext': 'mp4',
87             'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
88             'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
89             'upload_date': '20171024',
90             'timestamp': 1508862118,
91         },
92         'params': {
93             'skip_download': True,
94         },
95     }, {
96         'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
97         'only_matching': True,
98     }, {
99         'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
100         'only_matching': True,
101     }, {
102         'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
103         'only_matching': True,
104     }]
105
106     def _real_extract(self, url):
107         display_id = self._match_id(url)
108
109         webpage = self._download_webpage(url, display_id)
110
111         media_id = self._search_regex(
112             r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
113             'limelight id')
114
115         info = self._limelight_result(media_id)
116         info.update({
117             'title': self._og_search_title(webpage, default=None),
118             'description': self._og_search_description(webpage, default=None),
119         })
120         return info
121
122
123 class TeleQuebecLiveIE(InfoExtractor):
124     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
125     _TEST = {
126         'url': 'http://zonevideo.telequebec.tv/endirect/',
127         'info_dict': {
128             'id': 'endirect',
129             'ext': 'mp4',
130             'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
131             'is_live': True,
132         },
133         'params': {
134             'skip_download': True,
135         },
136     }
137
138     def _real_extract(self, url):
139         video_id = self._match_id(url)
140
141         m3u8_url = None
142         webpage = self._download_webpage(
143             'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
144             fatal=False)
145         if webpage:
146             m3u8_url = self._search_regex(
147                 r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
148                 'm3u8 url', default=None, group='url')
149         if not m3u8_url:
150             m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
151         formats = self._extract_m3u8_formats(
152             m3u8_url, video_id, 'mp4', m3u8_id='hls')
153         self._sort_formats(formats)
154
155         return {
156             'id': video_id,
157             'title': self._live_title('Télé-Québec - En direct'),
158             'is_live': True,
159             'formats': formats,
160         }