[youtube] Skip unsupported adaptive stream type (#18804)
[youtube-dl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     parse_iso8601,
12     parse_duration,
13     str_or_none,
14     update_url_query,
15     urljoin,
16 )
17
18
19 class TVNowBaseIE(InfoExtractor):
20     _VIDEO_FIELDS = (
21         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
22         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
23         'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
24         'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
25
26     def _call_api(self, path, video_id, query):
27         return self._download_json(
28             'https://api.tvnow.de/v3/' + path, video_id, query=query)
29
30     def _extract_video(self, info, display_id):
31         video_id = compat_str(info['id'])
32         title = info['title']
33
34         paths = []
35         for manifest_url in (info.get('manifest') or {}).values():
36             if not manifest_url:
37                 continue
38             manifest_url = update_url_query(manifest_url, {'filter': ''})
39             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
40             if path in paths:
41                 continue
42             paths.append(path)
43
44             def url_repl(proto, suffix):
45                 return re.sub(
46                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
47                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
48                         '.ism/' + suffix, manifest_url))
49
50             formats = self._extract_mpd_formats(
51                 url_repl('dash', '.mpd'), video_id,
52                 mpd_id='dash', fatal=False)
53             formats.extend(self._extract_ism_formats(
54                 url_repl('hss', 'Manifest'),
55                 video_id, ism_id='mss', fatal=False))
56             formats.extend(self._extract_m3u8_formats(
57                 url_repl('hls', '.m3u8'), video_id, 'mp4',
58                 'm3u8_native', m3u8_id='hls', fatal=False))
59             if formats:
60                 break
61         else:
62             if info.get('isDrm'):
63                 raise ExtractorError(
64                     'Video %s is DRM protected' % video_id, expected=True)
65             if info.get('geoblocked'):
66                 raise self.raise_geo_restricted()
67             if not info.get('free', True):
68                 raise ExtractorError(
69                     'Video %s is not available for free' % video_id, expected=True)
70         self._sort_formats(formats)
71
72         description = info.get('articleLong') or info.get('articleShort')
73         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
74         duration = parse_duration(info.get('duration'))
75
76         f = info.get('format', {})
77
78         thumbnails = [{
79             'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
80         }]
81         thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
82         if thumbnail:
83             thumbnails.append({
84                 'url': thumbnail,
85             })
86
87         return {
88             'id': video_id,
89             'display_id': display_id,
90             'title': title,
91             'description': description,
92             'thumbnails': thumbnails,
93             'timestamp': timestamp,
94             'duration': duration,
95             'series': f.get('title'),
96             'season_number': int_or_none(info.get('season')),
97             'episode_number': int_or_none(info.get('episode')),
98             'episode': title,
99             'formats': formats,
100         }
101
102
103 class TVNowIE(TVNowBaseIE):
104     _VALID_URL = r'''(?x)
105                     https?://
106                         (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
107                         (?P<show_id>[^/]+)/
108                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
109                     '''
110
111     @classmethod
112     def suitable(cls, url):
113         return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
114                 else super(TVNowIE, cls).suitable(url))
115
116     _TESTS = [{
117         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
118         'info_dict': {
119             'id': '331082',
120             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
121             'ext': 'mp4',
122             'title': 'Der neue Porsche 911 GT 3',
123             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
124             'timestamp': 1495994400,
125             'upload_date': '20170528',
126             'duration': 5283,
127             'series': 'GRIP - Das Motormagazin',
128             'season_number': 14,
129             'episode_number': 405,
130             'episode': 'Der neue Porsche 911 GT 3',
131         },
132     }, {
133         # rtl2
134         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
135         'only_matching': True,
136     }, {
137         # rtlnitro
138         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
139         'only_matching': True,
140     }, {
141         # superrtl
142         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
143         'only_matching': True,
144     }, {
145         # ntv
146         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
147         'only_matching': True,
148     }, {
149         # vox
150         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
151         'only_matching': True,
152     }, {
153         # rtlplus
154         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
155         'only_matching': True,
156     }, {
157         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
158         'only_matching': True,
159     }]
160
161     def _real_extract(self, url):
162         mobj = re.match(self._VALID_URL, url)
163         display_id = '%s/%s' % mobj.group(2, 3)
164
165         info = self._call_api(
166             'movies/' + display_id, display_id, query={
167                 'fields': ','.join(self._VIDEO_FIELDS),
168             })
169
170         return self._extract_video(info, display_id)
171
172
173 class TVNowNewIE(InfoExtractor):
174     _VALID_URL = r'''(?x)
175                     (?P<base_url>https?://
176                         (?:www\.)?tvnow\.(?:de|at|ch)/
177                         (?:shows|serien))/
178                         (?P<show>[^/]+)-\d+/
179                         [^/]+/
180                         episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
181                     '''
182
183     _TESTS = [{
184         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
185         'only_matching': True,
186     }]
187
188     def _real_extract(self, url):
189         mobj = re.match(self._VALID_URL, url)
190         base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
191         show, episode = mobj.group('show', 'episode')
192         return self.url_result(
193             # Rewrite new URLs to the old format and use extraction via old API
194             # at api.tvnow.de as a loophole for bypassing premium content checks
195             '%s/%s/%s' % (base_url, show, episode),
196             ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
197
198
199 class TVNowNewBaseIE(InfoExtractor):
200     def _call_api(self, path, video_id, query={}):
201         result = self._download_json(
202             'https://apigw.tvnow.de/module/' + path, video_id, query=query)
203         error = result.get('error')
204         if error:
205             raise ExtractorError(
206                 '%s said: %s' % (self.IE_NAME, error), expected=True)
207         return result
208
209
210 """
211 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
212 when api.tvnow.de is shut down. This version can't bypass premium checks though.
213 class TVNowIE(TVNowNewBaseIE):
214     _VALID_URL = r'''(?x)
215                     https?://
216                         (?:www\.)?tvnow\.(?:de|at|ch)/
217                         (?:shows|serien)/[^/]+/
218                         (?:[^/]+/)+
219                         (?P<display_id>[^/?$&]+)-(?P<id>\d+)
220                     '''
221
222     _TESTS = [{
223         # episode with annual navigation
224         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
225         'info_dict': {
226             'id': '331082',
227             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
228             'ext': 'mp4',
229             'title': 'Der neue Porsche 911 GT 3',
230             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
231             'thumbnail': r're:^https?://.*\.jpg$',
232             'timestamp': 1495994400,
233             'upload_date': '20170528',
234             'duration': 5283,
235             'series': 'GRIP - Das Motormagazin',
236             'season_number': 14,
237             'episode_number': 405,
238             'episode': 'Der neue Porsche 911 GT 3',
239         },
240     }, {
241         # rtl2, episode with season navigation
242         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
243         'only_matching': True,
244     }, {
245         # rtlnitro
246         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
247         'only_matching': True,
248     }, {
249         # superrtl
250         'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
251         'only_matching': True,
252     }, {
253         # ntv
254         'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
255         'only_matching': True,
256     }, {
257         # vox
258         'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
259         'only_matching': True,
260     }, {
261         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
262         'only_matching': True,
263     }]
264
265     def _extract_video(self, info, url, display_id):
266         config = info['config']
267         source = config['source']
268
269         video_id = compat_str(info.get('id') or source['videoId'])
270         title = source['title'].strip()
271
272         paths = []
273         for manifest_url in (info.get('manifest') or {}).values():
274             if not manifest_url:
275                 continue
276             manifest_url = update_url_query(manifest_url, {'filter': ''})
277             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
278             if path in paths:
279                 continue
280             paths.append(path)
281
282             def url_repl(proto, suffix):
283                 return re.sub(
284                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
285                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
286                         '.ism/' + suffix, manifest_url))
287
288             formats = self._extract_mpd_formats(
289                 url_repl('dash', '.mpd'), video_id,
290                 mpd_id='dash', fatal=False)
291             formats.extend(self._extract_ism_formats(
292                 url_repl('hss', 'Manifest'),
293                 video_id, ism_id='mss', fatal=False))
294             formats.extend(self._extract_m3u8_formats(
295                 url_repl('hls', '.m3u8'), video_id, 'mp4',
296                 'm3u8_native', m3u8_id='hls', fatal=False))
297             if formats:
298                 break
299         else:
300             if try_get(info, lambda x: x['rights']['isDrm']):
301                 raise ExtractorError(
302                     'Video %s is DRM protected' % video_id, expected=True)
303             if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
304                 raise self.raise_geo_restricted()
305             if not info.get('free', True):
306                 raise ExtractorError(
307                     'Video %s is not available for free' % video_id, expected=True)
308         self._sort_formats(formats)
309
310         description = source.get('description')
311         thumbnail = url_or_none(source.get('poster'))
312         timestamp = unified_timestamp(source.get('previewStart'))
313         duration = parse_duration(source.get('length'))
314
315         series = source.get('format')
316         season_number = int_or_none(self._search_regex(
317             r'staffel-(\d+)', url, 'season number', default=None))
318         episode_number = int_or_none(self._search_regex(
319             r'episode-(\d+)', url, 'episode number', default=None))
320
321         return {
322             'id': video_id,
323             'display_id': display_id,
324             'title': title,
325             'description': description,
326             'thumbnail': thumbnail,
327             'timestamp': timestamp,
328             'duration': duration,
329             'series': series,
330             'season_number': season_number,
331             'episode_number': episode_number,
332             'episode': title,
333             'formats': formats,
334         }
335
336     def _real_extract(self, url):
337         display_id, video_id = re.match(self._VALID_URL, url).groups()
338         info = self._call_api('player/' + video_id, video_id)
339         return self._extract_video(info, video_id, display_id)
340 """
341
342
343 class TVNowListBaseIE(TVNowNewBaseIE):
344     _SHOW_VALID_URL = r'''(?x)
345                     (?P<base_url>
346                         https?://
347                             (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
348                             [^/?#&]+-(?P<show_id>\d+)
349                     )
350                     '''
351
352     @classmethod
353     def suitable(cls, url):
354         return (False if TVNowNewIE.suitable(url)
355                 else super(TVNowListBaseIE, cls).suitable(url))
356
357     def _extract_items(self, url, show_id, list_id, query):
358         items = self._call_api(
359             'teaserrow/format/episode/' + show_id, list_id,
360             query=query)['items']
361
362         entries = []
363         for item in items:
364             if not isinstance(item, dict):
365                 continue
366             item_url = urljoin(url, item.get('url'))
367             if not item_url:
368                 continue
369             video_id = str_or_none(item.get('id') or item.get('videoId'))
370             item_title = item.get('subheadline') or item.get('text')
371             entries.append(self.url_result(
372                 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
373                 video_title=item_title))
374
375         return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
376
377
378 class TVNowSeasonIE(TVNowListBaseIE):
379     _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
380     _TESTS = [{
381         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
382         'info_dict': {
383             'id': '1815/13',
384         },
385         'playlist_mincount': 22,
386     }]
387
388     def _real_extract(self, url):
389         _, show_id, season_id = re.match(self._VALID_URL, url).groups()
390         return self._extract_items(
391             url, show_id, season_id, {'season': season_id})
392
393
394 class TVNowAnnualIE(TVNowListBaseIE):
395     _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
396     _TESTS = [{
397         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
398         'info_dict': {
399             'id': '1669/2017-05',
400         },
401         'playlist_mincount': 2,
402     }]
403
404     def _real_extract(self, url):
405         _, show_id, year, month = re.match(self._VALID_URL, url).groups()
406         return self._extract_items(
407             url, show_id, '%s-%s' % (year, month), {
408                 'year': int(year),
409                 'month': int(month),
410             })
411
412
413 class TVNowShowIE(TVNowListBaseIE):
414     _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
415     _TESTS = [{
416         # annual navigationType
417         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
418         'info_dict': {
419             'id': '1669',
420         },
421         'playlist_mincount': 73,
422     }, {
423         # season navigationType
424         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
425         'info_dict': {
426             'id': '11471',
427         },
428         'playlist_mincount': 3,
429     }]
430
431     @classmethod
432     def suitable(cls, url):
433         return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
434                 else super(TVNowShowIE, cls).suitable(url))
435
436     def _real_extract(self, url):
437         base_url, show_id = re.match(self._VALID_URL, url).groups()
438
439         result = self._call_api(
440             'teaserrow/format/navigation/' + show_id, show_id)
441
442         items = result['items']
443
444         entries = []
445         navigation = result.get('navigationType')
446         if navigation == 'annual':
447             for item in items:
448                 if not isinstance(item, dict):
449                     continue
450                 year = int_or_none(item.get('year'))
451                 if year is None:
452                     continue
453                 months = item.get('months')
454                 if not isinstance(months, list):
455                     continue
456                 for month_dict in months:
457                     if not isinstance(month_dict, dict) or not month_dict:
458                         continue
459                     month_number = int_or_none(list(month_dict.keys())[0])
460                     if month_number is None:
461                         continue
462                     entries.append(self.url_result(
463                         '%s/%04d-%02d' % (base_url, year, month_number),
464                         ie=TVNowAnnualIE.ie_key()))
465         elif navigation == 'season':
466             for item in items:
467                 if not isinstance(item, dict):
468                     continue
469                 season_number = int_or_none(item.get('season'))
470                 if season_number is None:
471                     continue
472                 entries.append(self.url_result(
473                     '%s/staffel-%d' % (base_url, season_number),
474                     ie=TVNowSeasonIE.ie_key()))
475         else:
476             raise ExtractorError('Unknown navigationType')
477
478         return self.playlist_result(entries, show_id)