[youtube] Add ability to authenticate with cookies
[youtube-dl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     parse_iso8601,
12     parse_duration,
13     try_get,
14     update_url_query,
15 )
16
17
18 class TVNowBaseIE(InfoExtractor):
19     _VIDEO_FIELDS = (
20         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
21         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
22         'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
23         'format.defaultImage169Logo')
24
25     def _call_api(self, path, video_id, query):
26         return self._download_json(
27             'https://api.tvnow.de/v3/' + path,
28             video_id, query=query)
29
30     def _extract_video(self, info, display_id):
31         video_id = compat_str(info['id'])
32         title = info['title']
33
34         mpd_url = info['manifest']['dashclear']
35         if not mpd_url:
36             if info.get('isDrm'):
37                 raise ExtractorError(
38                     'Video %s is DRM protected' % video_id, expected=True)
39             if info.get('geoblocked'):
40                 raise ExtractorError(
41                     'Video %s is not available from your location due to geo restriction' % video_id,
42                     expected=True)
43             if not info.get('free', True):
44                 raise ExtractorError(
45                     'Video %s is not available for free' % video_id, expected=True)
46
47         mpd_url = update_url_query(mpd_url, {'filter': ''})
48         formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
49         formats.extend(self._extract_ism_formats(
50             mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
51             video_id, ism_id='mss', fatal=False))
52         formats.extend(self._extract_m3u8_formats(
53             mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
54             video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
55         self._sort_formats(formats)
56
57         description = info.get('articleLong') or info.get('articleShort')
58         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
59         duration = parse_duration(info.get('duration'))
60
61         f = info.get('format', {})
62
63         thumbnails = [{
64             'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
65         }]
66         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
67         if thumbnail:
68             thumbnails.append({
69                 'url': thumbnail,
70             })
71
72         return {
73             'id': video_id,
74             'display_id': display_id,
75             'title': title,
76             'description': description,
77             'thumbnails': thumbnails,
78             'timestamp': timestamp,
79             'duration': duration,
80             'series': f.get('title'),
81             'season_number': int_or_none(info.get('season')),
82             'episode_number': int_or_none(info.get('episode')),
83             'episode': title,
84             'formats': formats,
85         }
86
87
88 class TVNowIE(TVNowBaseIE):
89     _VALID_URL = r'''(?x)
90                     https?://
91                         (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
92                         (?P<show_id>[^/]+)/
93                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
94                     '''
95
96     _TESTS = [{
97         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
98         'info_dict': {
99             'id': '331082',
100             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
101             'ext': 'mp4',
102             'title': 'Der neue Porsche 911 GT 3',
103             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
104             'thumbnail': r're:^https?://.*\.jpg$',
105             'timestamp': 1495994400,
106             'upload_date': '20170528',
107             'duration': 5283,
108             'series': 'GRIP - Das Motormagazin',
109             'season_number': 14,
110             'episode_number': 405,
111             'episode': 'Der neue Porsche 911 GT 3',
112         },
113     }, {
114         # rtl2
115         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
116         'only_matching': True,
117     }, {
118         # rtlnitro
119         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
120         'only_matching': True,
121     }, {
122         # superrtl
123         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
124         'only_matching': True,
125     }, {
126         # ntv
127         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
128         'only_matching': True,
129     }, {
130         # vox
131         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
132         'only_matching': True,
133     }, {
134         # rtlplus
135         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
136         'only_matching': True,
137     }, {
138         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
139         'only_matching': True,
140     }]
141
142     def _real_extract(self, url):
143         display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
144
145         info = self._call_api(
146             'movies/' + display_id, display_id, query={
147                 'fields': ','.join(self._VIDEO_FIELDS),
148             })
149
150         return self._extract_video(info, display_id)
151
152
153 class TVNowListBaseIE(TVNowBaseIE):
154     _SHOW_VALID_URL = r'''(?x)
155                     (?P<base_url>
156                         https?://
157                             (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
158                             (?P<show_id>[^/]+)
159                     )
160                     '''
161
162     def _extract_list_info(self, display_id, show_id):
163         fields = list(self._SHOW_FIELDS)
164         fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
165         fields.extend(
166             'formatTabs.formatTabPages.container.movies.%s' % field
167             for field in self._VIDEO_FIELDS)
168         return self._call_api(
169             'formats/seo', display_id, query={
170                 'fields': ','.join(fields),
171                 'name': show_id + '.php'
172             })
173
174
175 class TVNowListIE(TVNowListBaseIE):
176     _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
177
178     _SHOW_FIELDS = ('title', )
179     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
180     _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
181
182     _TESTS = [{
183         'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
184         'info_dict': {
185             'id': '28296',
186             'title': '30 Minuten Deutschland - Aktuell',
187         },
188         'playlist_mincount': 1,
189     }, {
190         'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
191         'only_matching': True,
192     }, {
193         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
194         'only_matching': True,
195     }]
196
197     @classmethod
198     def suitable(cls, url):
199         return (False if TVNowIE.suitable(url)
200                 else super(TVNowListIE, cls).suitable(url))
201
202     def _real_extract(self, url):
203         base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
204
205         list_info = self._extract_list_info(season_id, show_id)
206
207         season = next(
208             season for season in list_info['formatTabs']['items']
209             if season.get('seoheadline') == season_id)
210
211         title = list_info.get('title')
212         headline = season.get('headline')
213         if title and headline:
214             title = '%s - %s' % (title, headline)
215         else:
216             title = headline or title
217
218         entries = []
219         for container in season['formatTabPages']['items']:
220             items = try_get(
221                 container, lambda x: x['container']['movies']['items'],
222                 list) or []
223             for info in items:
224                 seo_url = info.get('seoUrl')
225                 if not seo_url:
226                     continue
227                 video_id = info.get('id')
228                 entries.append(self.url_result(
229                     '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
230                     compat_str(video_id) if video_id else None))
231
232         return self.playlist_result(
233             entries, compat_str(season.get('id') or season_id), title)
234
235
236 class TVNowShowIE(TVNowListBaseIE):
237     _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
238
239     _SHOW_FIELDS = ('id', 'title', )
240     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
241     _VIDEO_FIELDS = ()
242
243     _TESTS = [{
244         'url': 'https://www.tvnow.at/vox/ab-ins-beet',
245         'info_dict': {
246             'id': 'ab-ins-beet',
247             'title': 'Ab ins Beet!',
248         },
249         'playlist_mincount': 7,
250     }, {
251         'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
252         'only_matching': True,
253     }, {
254         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
255         'only_matching': True,
256     }]
257
258     @classmethod
259     def suitable(cls, url):
260         return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
261                 else super(TVNowShowIE, cls).suitable(url))
262
263     def _real_extract(self, url):
264         base_url, show_id = re.match(self._VALID_URL, url).groups()
265
266         list_info = self._extract_list_info(show_id, show_id)
267
268         entries = []
269         for season_info in list_info['formatTabs']['items']:
270             season_url = season_info.get('seoheadline')
271             if not season_url:
272                 continue
273             season_id = season_info.get('id')
274             entries.append(self.url_result(
275                 '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
276                 compat_str(season_id) if season_id else None,
277                 season_info.get('headline')))
278
279         return self.playlist_result(entries, show_id, list_info.get('title'))