_ Git - youtube-dl/blob - youtube_dl/extractor/zattoo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from uuid import uuid4
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     compat_str,
  10     ExtractorError,
  11     sanitized_Request,
  12     urlencode_postdata,
  13 )
  14
  15
  16 class ZattooBaseIE(InfoExtractor):
  17
  18     _NETRC_MACHINE = 'zattoo'
  19     _HOST_URL = 'https://zattoo.com'
  20
  21     _power_guide_hash = None
  22
  23     def _login(self, uuid, session_id):
  24         (username, password) = self._get_login_info()
  25         if not username or not password:
  26             raise ExtractorError(
  27                 'A valid %s account is needed to access this media.' % self._NETRC_MACHINE,
  28                 expected=True)
  29         login_form = {
  30             'login': username,
  31             'password': password,
  32             'remember': True,
  33         }
  34         request = sanitized_Request(
  35             '%s/zapi/v2/account/login' % self._HOST_URL,
  36             urlencode_postdata(login_form))
  37         request.add_header(
  38             'Referer', '%s/login' % self._HOST_URL)
  39         request.add_header(
  40             'Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
  41         request.add_header(
  42             'Cookie', 'uuid=%s; beaker.session.id=%s' % (uuid, session_id))
  43         response = self._request_webpage(
  44             request, None, 'Logging in')
  45         data = self._parse_json(response.read(), None)
  46         return data['session']['power_guide_hash']
  47
  48     def _get_app_token_and_version(self):
  49         host_webpage = self._download_webpage(
  50             self._HOST_URL, None, 'Downloading %s' % self._HOST_URL)
  51         app_token = self._html_search_regex(
  52             r'<script.+window\.appToken\s*=\s*\'(.+)\'', host_webpage, 'app token')
  53         app_version = self._html_search_regex(
  54             r'<!--\w+-(.+?)-', host_webpage, 'app version', default='2.8.2')
  55         return app_token, app_version
  56
  57     def _say_hello(self, uuid, app_token, app_version):
  58         postdata = {
  59             'client_app_token': app_token,
  60             'uuid': uuid,
  61             'lang': 'en',
  62             'app_version': app_version,
  63             'format': 'json',
  64         }
  65         request = sanitized_Request(
  66             '%s/zapi/v2/session/hello' % self._HOST_URL,
  67             urlencode_postdata(postdata))
  68         response = self._request_webpage(
  69             request, None, 'Say hello')
  70
  71         cookie = response.headers.get('Set-Cookie')
  72         session_id = self._search_regex(
  73             r'beaker\.session\.id\s*=\s*(.+?);', cookie, 'session id')
  74         return session_id
  75
  76     def _extract_cid(self, video_id, channel_name):
  77         channel_groups = self._download_json(
  78             '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
  79                                                self._power_guide_hash),
  80             video_id,
  81             'Downloading available channel list',
  82             query={'details': False})['channel_groups']
  83         channel_list = []
  84         for chgrp in channel_groups:
  85             channel_list.extend(chgrp['channels'])
  86         try:
  87             return next(
  88                 chan['cid'] for chan in channel_list
  89                 if chan['display_alias'] == channel_name or chan['cid'] == channel_name)
  90         except StopIteration:
  91             raise ExtractorError('Could not extract channel id')
  92
  93     def _extract_cid_and_video_info(self, video_id):
  94         data = self._download_json(
  95             '%s/zapi/program/details' % self._HOST_URL,
  96             video_id,
  97             'Downloading video information',
  98             query={
  99                 'program_id': video_id,
 100                 'complete': True
 101             })
 102
 103         info_dict = {
 104             'id': video_id,
 105             'title': data['program']['title'],
 106             'description': data['program'].get('description'),
 107             'thumbnail': data['program'].get('image_url')
 108         }
 109         cid = data['program']['cid']
 110         return cid, info_dict
 111
 112     def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
 113         postdata = {
 114             'stream_type': 'dash',
 115             'https_watch_urls': True,
 116         }
 117         if record_id:
 118             url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
 119         else:
 120             url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
 121
 122         if is_live:
 123             postdata.update({'timeshift': 10800})
 124             url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
 125
 126         data = self._download_json(
 127             sanitized_Request(url, urlencode_postdata(postdata)),
 128             video_id, 'Downloading dash formats')
 129
 130         formats = []
 131         for elem in data['stream']['watch_urls']:
 132             audio_channel = elem.get('audio_channel')
 133             maxrate = elem.get('maxrate')
 134             formats.extend(
 135                 self._extract_mpd_formats(
 136                     elem['url'], video_id,
 137                     mpd_id='dash-maxrate-%s-channel-%s' % (maxrate, audio_channel), fatal=False))
 138
 139         postdata.update({'stream_type': 'hls'})
 140         request = sanitized_Request(
 141             url, urlencode_postdata(postdata))
 142         data = self._download_json(
 143             request, video_id, 'Downloading hls formats')
 144         for elem in data['stream']['watch_urls']:
 145             audio_channel = elem.get('audio_channel')
 146             preference = None
 147
 148             # Prefer audio channel A:
 149             if audio_channel == 'A':
 150                 preference = 1
 151
 152             maxrate = elem.get('maxrate')
 153             formats.extend(
 154                 self._extract_m3u8_formats(
 155                     elem['url'], video_id, 'mp4', entry_protocol='m3u8_native',
 156                     preference=preference,
 157                     m3u8_id='hls-maxrate-%s-channel-%s' % (maxrate, audio_channel),
 158                     fatal=False))
 159
 160         self._sort_formats(formats)
 161         return formats
 162
 163     def _real_initialize(self):
 164         uuid = compat_str(uuid4())
 165         app_token, app_version = self._get_app_token_and_version()
 166         session_id = self._say_hello(uuid, app_token, app_version)
 167         self._power_guide_hash = self._login(uuid, session_id)
 168
 169     def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
 170         if is_live:
 171             cid = self._extract_cid(video_id, channel_name)
 172             info_dict = {
 173                 'id': channel_name,
 174                 'title': self._live_title(channel_name),
 175                 'is_live': True,
 176             }
 177         else:
 178             cid, info_dict = self._extract_cid_and_video_info(video_id)
 179         formats = self._extract_formats(
 180             cid, video_id, record_id=record_id, is_live=is_live)
 181         info_dict['formats'] = formats
 182         return info_dict
 183
 184
 185 class QuicklineBaseIE(ZattooBaseIE):
 186     _NETRC_MACHINE = 'quickline'
 187     _HOST_URL = 'https://mobiltv.quickline.com'
 188
 189
 190 class QuicklineIE(QuicklineBaseIE):
 191     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
 192
 193     def _real_extract(self, url):
 194         channel_name, video_id = re.match(self._VALID_URL, url).groups()
 195         return self._extract_video(channel_name, video_id)
 196
 197
 198 class QuicklineLiveIE(QuicklineBaseIE):
 199     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)$'
 200
 201     def _real_extract(self, url):
 202         channel_name = video_id = self._match_id(url)
 203         return self._extract_video(channel_name, video_id, is_live=True)
 204
 205
 206 class ZattooIE(ZattooBaseIE):
 207     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
 208
 209     # Since regular videos are only available for 7 days and recorded videos
 210     # are only available for a specific user, we cannot have detailed tests.
 211     _TESTS = [{
 212         'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 213         'only_matching': True,
 214     }, {
 215         'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
 216         'only_matching': True,
 217     }]
 218
 219     def _real_extract(self, url):
 220         channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
 221         return self._extract_video(channel_name, video_id, record_id)
 222
 223
 224 class ZattooLiveIE(ZattooBaseIE):
 225     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)$'
 226
 227     _TEST = {
 228         'url': 'https://zattoo.com/watch/srf1',
 229         'only_matching': True,
 230     }
 231
 232     def _real_extract(self, url):
 233         channel_name = video_id = self._match_id(url)
 234         return self._extract_video(channel_name, video_id, is_live=True)