[zattoo] Add extractor (closes #14668)
[youtube-dl] / youtube_dl / extractor / zattoo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from uuid import uuid4
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import (
9     compat_str,
10     ExtractorError,
11     sanitized_Request,
12     urlencode_postdata,
13 )
14
15
16 class ZattooBaseIE(InfoExtractor):
17
18     _NETRC_MACHINE = 'zattoo'
19     _HOST_URL = 'https://zattoo.com'
20
21     _power_guide_hash = None
22
23     def _login(self, uuid, session_id):
24         (username, password) = self._get_login_info()
25         if not username or not password:
26             raise ExtractorError(
27                 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE,
28                 expected=True)
29         login_form = {
30             'login': username,
31             'password': password,
32             'remember': True,
33         }
34         request = sanitized_Request(
35             '%s/zapi/v2/account/login' % self._HOST_URL,
36             urlencode_postdata(login_form))
37         request.add_header(
38             'Referer', '%s/login' % self._HOST_URL)
39         request.add_header(
40             'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
41         request.add_header(
42             'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id))
43         response = self._request_webpage(
44             request, None, 'Logging in')
45         data = self._parse_json(response.read(), None)
46         return data['session']['power_guide_hash']
47
48     def _get_app_token_and_version(self):
49         host_webpage = self._download_webpage(
50             self._HOST_URL, None, 'Downloading %s' % self._HOST_URL)
51         app_token = self._html_search_regex(
52             r'<script.+window\.appToken\s*=\s*\'(.+)\'', host_webpage, 'app token')
53         app_version = self._html_search_regex(
54             r'<!--\w+-(.+?)-', host_webpage, 'app version', default='2.8.2')
55         return app_token, app_version
56
57     def _say_hello(self, uuid, app_token, app_version):
58         postdata = {
59             'client_app_token': app_token,
60             'uuid': uuid,
61             'lang': 'en',
62             'app_version': app_version,
63             'format': 'json',
64         }
65         request = sanitized_Request(
66             '%s/zapi/v2/session/hello' % self._HOST_URL,
67             urlencode_postdata(postdata))
68         response = self._request_webpage(
69             request, None, 'Say hello')
70
71         cookie = response.headers.get('Set-Cookie')
72         session_id = self._search_regex(
73             r'beaker\.session\.id\s*=\s*(.+?);', cookie, 'session id')
74         return session_id
75
76     def _extract_cid(self, video_id, channel_name):
77         channel_groups = self._download_json(
78             '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
79                                                self._power_guide_hash),
80             video_id,
81             'Downloading available channel list',
82             query={'details': False})['channel_groups']
83         channel_list = []
84         for chgrp in channel_groups:
85             channel_list.extend(chgrp['channels'])
86         try:
87             return next(
88                 chan['cid'] for chan in channel_list
89                 if chan['display_alias'] == channel_name or chan['cid'] == channel_name)
90         except StopIteration:
91             raise ExtractorError('Could not extract channel id')
92
93     def _extract_cid_and_video_info(self, video_id):
94         data = self._download_json(
95             '%s/zapi/program/details' % self._HOST_URL,
96             video_id,
97             'Downloading video information',
98             query={
99                 'program_id': video_id,
100                 'complete': True
101             })
102
103         info_dict = {
104             'id': video_id,
105             'title': data['program']['title'],
106             'description': data['program'].get('description'),
107             'thumbnail': data['program'].get('image_url')
108         }
109         cid = data['program']['cid']
110         return cid, info_dict
111
112     def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
113         postdata = {
114             'stream_type': 'dash',
115             'https_watch_urls': True,
116         }
117         if record_id:
118             url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
119         else:
120             url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
121
122         if is_live:
123             postdata.update({'timeshift': 10800})
124             url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
125
126         data = self._download_json(
127             sanitized_Request(url, urlencode_postdata(postdata)),
128             video_id, 'Downloading dash formats')
129
130         formats = []
131         for elem in data['stream']['watch_urls']:
132             audio_channel = elem.get('audio_channel')
133             maxrate = elem.get('maxrate')
134             formats.extend(
135                 self._extract_mpd_formats(
136                     elem['url'], video_id,
137                     mpd_id='dash-maxrate-%s-channel-%s' % (maxrate, audio_channel), fatal=False))
138
139         postdata.update({'stream_type': 'hls'})
140         request = sanitized_Request(
141             url, urlencode_postdata(postdata))
142         data = self._download_json(
143             request, video_id, 'Downloading hls formats')
144         for elem in data['stream']['watch_urls']:
145             audio_channel = elem.get('audio_channel')
146             preference = None
147
148             # Prefer audio channel A:
149             if audio_channel == 'A':
150                 preference = 1
151
152             maxrate = elem.get('maxrate')
153             formats.extend(
154                 self._extract_m3u8_formats(
155                     elem['url'], video_id, 'mp4', entry_protocol='m3u8_native',
156                     preference=preference,
157                     m3u8_id='hls-maxrate-%s-channel-%s' % (maxrate, audio_channel),
158                     fatal=False))
159
160         self._sort_formats(formats)
161         return formats
162
163     def _real_initialize(self):
164         uuid = compat_str(uuid4())
165         app_token, app_version = self._get_app_token_and_version()
166         session_id = self._say_hello(uuid, app_token, app_version)
167         self._power_guide_hash = self._login(uuid, session_id)
168
169     def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
170         if is_live:
171             cid = self._extract_cid(video_id, channel_name)
172             info_dict = {
173                 'id': channel_name,
174                 'title': self._live_title(channel_name),
175                 'is_live': True,
176             }
177         else:
178             cid, info_dict = self._extract_cid_and_video_info(video_id)
179         formats = self._extract_formats(
180             cid, video_id, record_id=record_id, is_live=is_live)
181         info_dict['formats'] = formats
182         return info_dict
183
184
185 class QuicklineBaseIE(ZattooBaseIE):
186     _NETRC_MACHINE = 'quickline'
187     _HOST_URL = 'https://mobiltv.quickline.com'
188
189
190 class QuicklineIE(QuicklineBaseIE):
191     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
192
193     def _real_extract(self, url):
194         channel_name, video_id = re.match(self._VALID_URL, url).groups()
195         return self._extract_video(channel_name, video_id)
196
197
198 class QuicklineLiveIE(QuicklineBaseIE):
199     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)$'
200
201     def _real_extract(self, url):
202         channel_name = video_id = self._match_id(url)
203         return self._extract_video(channel_name, video_id, is_live=True)
204
205
206 class ZattooIE(ZattooBaseIE):
207     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
208
209     # Since regular videos are only available for 7 days and recorded videos
210     # are only available for a specific user, we cannot have detailed tests.
211     _TESTS = [{
212         'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
213         'only_matching': True,
214     }, {
215         'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
216         'only_matching': True,
217     }]
218
219     def _real_extract(self, url):
220         channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
221         return self._extract_video(channel_name, video_id, record_id)
222
223
224 class ZattooLiveIE(ZattooBaseIE):
225     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)$'
226
227     _TEST = {
228         'url': 'https://zattoo.com/watch/srf1',
229         'only_matching': True,
230     }
231
232     def _real_extract(self, url):
233         channel_name = video_id = self._match_id(url)
234         return self._extract_video(channel_name, video_id, is_live=True)