[zattoo] Add support for more zattoo platform sites
[youtube-dl] / youtube_dl / extractor / zattoo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 from uuid import uuid4
6
7 from .common import InfoExtractor
8 from ..compat import (
9     compat_HTTPError,
10     compat_str,
11 )
12 from ..utils import (
13     ExtractorError,
14     int_or_none,
15     try_get,
16     url_or_none,
17     urlencode_postdata,
18 )
19
20
21 class ZattooPlatformBaseIE(InfoExtractor):
22     _power_guide_hash = None
23
24     def _host_url(self):
25         return 'https://%s' % self._HOST
26
27     def _login(self):
28         username, password = self._get_login_info()
29         if not username or not password:
30             self.raise_login_required(
31                 'A valid %s account is needed to access this media.'
32                 % self._NETRC_MACHINE)
33
34         try:
35             data = self._download_json(
36                 '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
37                 data=urlencode_postdata({
38                     'login': username,
39                     'password': password,
40                     'remember': 'true',
41                 }), headers={
42                     'Referer': '%s/login' % self._host_url(),
43                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
44                 })
45         except ExtractorError as e:
46             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
47                 raise ExtractorError(
48                     'Unable to login: incorrect username and/or password',
49                     expected=True)
50             raise
51
52         self._power_guide_hash = data['session']['power_guide_hash']
53
54     def _real_initialize(self):
55         webpage = self._download_webpage(
56             self._host_url(), None, 'Downloading app token')
57         app_token = self._html_search_regex(
58             r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
59             webpage, 'app token', group='token')
60         app_version = self._html_search_regex(
61             r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
62
63         # Will setup appropriate cookies
64         self._request_webpage(
65             '%s/zapi/v2/session/hello' % self._host_url(), None,
66             'Opening session', data=urlencode_postdata({
67                 'client_app_token': app_token,
68                 'uuid': compat_str(uuid4()),
69                 'lang': 'en',
70                 'app_version': app_version,
71                 'format': 'json',
72             }))
73
74         self._login()
75
76     def _extract_cid(self, video_id, channel_name):
77         channel_groups = self._download_json(
78             '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
79                                                self._power_guide_hash),
80             video_id, 'Downloading channel list',
81             query={'details': False})['channel_groups']
82         channel_list = []
83         for chgrp in channel_groups:
84             channel_list.extend(chgrp['channels'])
85         try:
86             return next(
87                 chan['cid'] for chan in channel_list
88                 if chan.get('cid') and (
89                     chan.get('display_alias') == channel_name or
90                     chan.get('cid') == channel_name))
91         except StopIteration:
92             raise ExtractorError('Could not extract channel id')
93
94     def _extract_cid_and_video_info(self, video_id):
95         data = self._download_json(
96             '%s/zapi/v2/cached/program/power_details/%s' % (
97                 self._host_url(), self._power_guide_hash),
98             video_id,
99             'Downloading video information',
100             query={
101                 'program_ids': video_id,
102                 'complete': True,
103             })
104
105         p = data['programs'][0]
106         cid = p['cid']
107
108         info_dict = {
109             'id': video_id,
110             'title': p.get('t') or p['et'],
111             'description': p.get('d'),
112             'thumbnail': p.get('i_url'),
113             'creator': p.get('channel_name'),
114             'episode': p.get('et'),
115             'episode_number': int_or_none(p.get('e_no')),
116             'season_number': int_or_none(p.get('s_no')),
117             'release_year': int_or_none(p.get('year')),
118             'categories': try_get(p, lambda x: x['c'], list),
119             'tags': try_get(p, lambda x: x['g'], list)
120         }
121
122         return cid, info_dict
123
124     def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
125         postdata_common = {
126             'https_watch_urls': True,
127         }
128
129         if is_live:
130             postdata_common.update({'timeshift': 10800})
131             url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
132         elif record_id:
133             url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
134         else:
135             url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
136
137         formats = []
138         for stream_type in ('dash', 'hls', 'hls5', 'hds'):
139             postdata = postdata_common.copy()
140             postdata['stream_type'] = stream_type
141
142             data = self._download_json(
143                 url, video_id, 'Downloading %s formats' % stream_type.upper(),
144                 data=urlencode_postdata(postdata), fatal=False)
145             if not data:
146                 continue
147
148             watch_urls = try_get(
149                 data, lambda x: x['stream']['watch_urls'], list)
150             if not watch_urls:
151                 continue
152
153             for watch in watch_urls:
154                 if not isinstance(watch, dict):
155                     continue
156                 watch_url = url_or_none(watch.get('url'))
157                 if not watch_url:
158                     continue
159                 format_id_list = [stream_type]
160                 maxrate = watch.get('maxrate')
161                 if maxrate:
162                     format_id_list.append(compat_str(maxrate))
163                 audio_channel = watch.get('audio_channel')
164                 if audio_channel:
165                     format_id_list.append(compat_str(audio_channel))
166                 preference = 1 if audio_channel == 'A' else None
167                 format_id = '-'.join(format_id_list)
168                 if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
169                     this_formats = self._extract_mpd_formats(
170                         watch_url, video_id, mpd_id=format_id, fatal=False)
171                 elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
172                     this_formats = self._extract_m3u8_formats(
173                         watch_url, video_id, 'mp4',
174                         entry_protocol='m3u8_native', m3u8_id=format_id,
175                         fatal=False)
176                 elif stream_type == 'hds':
177                     this_formats = self._extract_f4m_formats(
178                         watch_url, video_id, f4m_id=format_id, fatal=False)
179                 elif stream_type == 'smooth_playready':
180                     this_formats = self._extract_ism_formats(
181                         watch_url, video_id, ism_id=format_id, fatal=False)
182                 else:
183                     assert False
184                 for this_format in this_formats:
185                     this_format['preference'] = preference
186                 formats.extend(this_formats)
187         self._sort_formats(formats)
188         return formats
189
190     def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
191         if is_live:
192             cid = self._extract_cid(video_id, channel_name)
193             info_dict = {
194                 'id': channel_name,
195                 'title': self._live_title(channel_name),
196                 'is_live': True,
197             }
198         else:
199             cid, info_dict = self._extract_cid_and_video_info(video_id)
200         formats = self._extract_formats(
201             cid, video_id, record_id=record_id, is_live=is_live)
202         info_dict['formats'] = formats
203         return info_dict
204
205
206 class QuicklineBaseIE(ZattooPlatformBaseIE):
207     _NETRC_MACHINE = 'quickline'
208     _HOST = 'mobiltv.quickline.com'
209
210
211 class QuicklineIE(QuicklineBaseIE):
212     _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
213
214     _TEST = {
215         'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
216         'only_matching': True,
217     }
218
219     def _real_extract(self, url):
220         channel_name, video_id = re.match(self._VALID_URL, url).groups()
221         return self._extract_video(channel_name, video_id)
222
223
224 class QuicklineLiveIE(QuicklineBaseIE):
225     _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
226
227     _TEST = {
228         'url': 'https://mobiltv.quickline.com/watch/srf1',
229         'only_matching': True,
230     }
231
232     @classmethod
233     def suitable(cls, url):
234         return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
235
236     def _real_extract(self, url):
237         channel_name = video_id = self._match_id(url)
238         return self._extract_video(channel_name, video_id, is_live=True)
239
240
241 class ZattooBaseIE(ZattooPlatformBaseIE):
242     _NETRC_MACHINE = 'zattoo'
243     _HOST = 'zattoo.com'
244
245
246 def _make_valid_url(tmpl, host):
247     return tmpl % re.escape(host)
248
249
250 class ZattooIE(ZattooBaseIE):
251     _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
252     _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
253
254     # Since regular videos are only available for 7 days and recorded videos
255     # are only available for a specific user, we cannot have detailed tests.
256     _TESTS = [{
257         'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
258         'only_matching': True,
259     }, {
260         'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
261         'only_matching': True,
262     }]
263
264     def _real_extract(self, url):
265         channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
266         return self._extract_video(channel_name, video_id, record_id)
267
268
269 class ZattooLiveIE(ZattooBaseIE):
270     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
271
272     _TEST = {
273         'url': 'https://zattoo.com/watch/srf1',
274         'only_matching': True,
275     }
276
277     @classmethod
278     def suitable(cls, url):
279         return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
280
281     def _real_extract(self, url):
282         channel_name = video_id = self._match_id(url)
283         return self._extract_video(channel_name, video_id, is_live=True)
284
285
286 class NetPlusIE(ZattooIE):
287     _NETRC_MACHINE = 'netplus'
288     _HOST = 'netplus.tv'
289     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
290
291     _TESTS = [{
292         'url': 'https://www.netplus.tv/watch/abc/123-abc',
293         'only_matching': True,
294     }]
295
296
297 class MNetTVIE(ZattooIE):
298     _NETRC_MACHINE = 'mnettv'
299     _HOST = 'tvplus.m-net.de'
300     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
301
302     _TESTS = [{
303         'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc',
304         'only_matching': True,
305     }]
306
307
308 class WalyTVIE(ZattooIE):
309     _NETRC_MACHINE = 'walytv'
310     _HOST = 'player.waly.tv'
311     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
312
313     _TESTS = [{
314         'url': 'https://www.player.waly.tv/watch/abc/123-abc',
315         'only_matching': True,
316     }]
317
318
319 class BBVTVIE(ZattooIE):
320     _NETRC_MACHINE = 'bbvtv'
321     _HOST = 'bbv-tv.net'
322     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
323
324     _TESTS = [{
325         'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
326         'only_matching': True,
327     }]
328
329
330 class VTXTVIE(ZattooIE):
331     _NETRC_MACHINE = 'vtxtv'
332     _HOST = 'vtxtv.ch'
333     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
334
335     _TESTS = [{
336         'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
337         'only_matching': True,
338     }]
339
340
341 class MyVisionTVIE(ZattooIE):
342     _NETRC_MACHINE = 'myvisiontv'
343     _HOST = 'myvisiontv.ch'
344     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
345
346     _TESTS = [{
347         'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
348         'only_matching': True,
349     }]
350
351
352 class GlattvisionTVIE(ZattooIE):
353     _NETRC_MACHINE = 'glattvisiontv'
354     _HOST = 'iptv.glattvision.ch'
355     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
356
357     _TESTS = [{
358         'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc',
359         'only_matching': True,
360     }]
361
362
363 class SAKTVIE(ZattooIE):
364     _NETRC_MACHINE = 'saktv'
365     _HOST = 'saktv.ch'
366     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
367
368     _TESTS = [{
369         'url': 'https://www.saktv.ch/watch/abc/123-abc',
370         'only_matching': True,
371     }]
372
373
374 class EWETVIE(ZattooIE):
375     _NETRC_MACHINE = 'ewetv'
376     _HOST = 'tvonline.ewe.de'
377     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
378
379     _TESTS = [{
380         'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc',
381         'only_matching': True,
382     }]
383
384
385 class QuantumTVIE(ZattooIE):
386     _NETRC_MACHINE = 'quantumtv'
387     _HOST = 'quantum-tv.com'
388     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
389
390     _TESTS = [{
391         'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
392         'only_matching': True,
393     }]
394
395
396 class OsnatelTVIE(ZattooIE):
397     _NETRC_MACHINE = 'osnateltv'
398     _HOST = 'onlinetv.osnatel.de'
399     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
400
401     _TESTS = [{
402         'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc',
403         'only_matching': True,
404     }]
405
406
407 class EinsUndEinsTVIE(ZattooIE):
408     _NETRC_MACHINE = '1und1tv'
409     _HOST = '1und1.tv'
410     _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
411
412     _TESTS = [{
413         'url': 'https://www.1und1.tv/watch/abc/123-abc',
414         'only_matching': True,
415     }]