[tvnow] Add support for shows
[youtube-dl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     parse_iso8601,
12     parse_duration,
13     update_url_query,
14 )
15
16
17 class TVNowBaseIE(InfoExtractor):
18     _VIDEO_FIELDS = (
19         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
20         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
21         'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
22         'format.defaultImage169Logo', 'replaceMovieInformation')
23
24     def _call_api(self, path, video_id, query):
25         return self._download_json(
26             'https://api.tvnow.de/v3/' + path,
27             video_id, query=query)
28
29     def _extract_video(self, info, display_id):
30         video_id = compat_str(info['id'])
31         title = info['title']
32
33         mpd_url = info['manifest']['dashclear']
34         if not mpd_url:
35             if info.get('isDrm'):
36                 raise ExtractorError(
37                     'Video %s is DRM protected' % video_id, expected=True)
38             if info.get('geoblocked'):
39                 raise ExtractorError(
40                     'Video %s is not available from your location due to geo restriction' % video_id,
41                     expected=True)
42             if not info.get('free', True):
43                 raise ExtractorError(
44                     'Video %s is not available for free' % video_id, expected=True)
45
46         mpd_url = update_url_query(mpd_url, {'filter': ''})
47         formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
48         formats.extend(self._extract_ism_formats(
49             mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
50             video_id, ism_id='mss', fatal=False))
51         formats.extend(self._extract_m3u8_formats(
52             mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
53             video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
54         self._sort_formats(formats)
55
56         description = info.get('articleLong') or info.get('articleShort')
57         timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
58         duration = parse_duration(info.get('duration'))
59
60         f = info.get('format', {})
61         thumbnail = ('https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % info.get('replaceMovieInformation')) or f.get('defaultImage169Format') or f.get('defaultImage169Logo')
62
63         return {
64             'id': video_id,
65             'display_id': display_id,
66             'title': title,
67             'description': description,
68             'thumbnail': thumbnail,
69             'timestamp': timestamp,
70             'duration': duration,
71             'series': f.get('title'),
72             'season_number': int_or_none(info.get('season')),
73             'episode_number': int_or_none(info.get('episode')),
74             'episode': title,
75             'formats': formats,
76         }
77
78
79 class TVNowIE(TVNowBaseIE):
80     _VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
81
82     _TESTS = [{
83         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
84         'info_dict': {
85             'id': '331082',
86             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
87             'ext': 'mp4',
88             'title': 'Der neue Porsche 911 GT 3',
89             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
90             'thumbnail': r're:^https?://.*\.jpg$',
91             'timestamp': 1495994400,
92             'upload_date': '20170528',
93             'duration': 5283,
94             'series': 'GRIP - Das Motormagazin',
95             'season_number': 14,
96             'episode_number': 405,
97             'episode': 'Der neue Porsche 911 GT 3',
98         },
99     }, {
100         # rtl2
101         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
102         'only_matching': 'True',
103     }, {
104         # rtlnitro
105         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
106         'only_matching': 'True',
107     }, {
108         # superrtl
109         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
110         'only_matching': 'True',
111     }, {
112         # ntv
113         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
114         'only_matching': 'True',
115     }, {
116         # vox
117         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
118         'only_matching': 'True',
119     }, {
120         # rtlplus
121         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
122         'only_matching': 'True',
123     }]
124
125     def _real_extract(self, url):
126         display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
127
128         info = self._call_api(
129             'movies/' + display_id, display_id, query={
130                 'fields': ','.join(self._VIDEO_FIELDS),
131             })
132
133         return self._extract_video(info, display_id)
134
135
136 class TVNowListBaseIE(TVNowBaseIE):
137     def _extend_query(self, show, season, video=None):
138         fields = []
139         fields.extend(show)
140         fields.extend('formatTabs.%s' % field for field in season)
141         if video:
142             fields.extend(
143                 'formatTabs.formatTabPages.container.movies.%s' % field
144                 for field in video)
145
146         return fields
147
148     def _tvnow_list_info(self, list_id, show_id, fields):
149         return self._call_api(
150             'formats/seo', list_id, query={
151                 'fields': ','.join(fields),
152                 'name': show_id + '.php'
153             })
154
155
156 class TVNowListIE(TVNowListBaseIE):
157     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
158
159     _SHOW_FIELDS = ('title', )
160     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
161     _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
162
163     _TESTS = [{
164         'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
165         'info_dict': {
166             'id': '28296',
167             'title': '30 Minuten Deutschland - Aktuell',
168         },
169         'playlist_mincount': 1,
170     }]
171
172     def _real_extract(self, url):
173         base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
174
175         list_info = self._tvnow_list_info(season_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS, self._VIDEO_FIELDS))
176
177         season = next(
178             season for season in list_info['formatTabs']['items']
179             if season.get('seoheadline') == season_id)
180
181         title = '%s - %s' % (list_info['title'], season['headline'])
182
183         entries = []
184         for container in season['formatTabPages']['items']:
185             for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
186                 seo_url = info.get('seoUrl')
187                 if not seo_url:
188                     continue
189
190                 entries.append(self.url_result(
191                     base_url + seo_url + '/player', 'TVNow', str(info.get('id', seo_url))))
192
193         return self.playlist_result(
194             entries, compat_str(season.get('id') or season_id), title)
195
196
197 class TVNowListChannelIE(TVNowListBaseIE):
198     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+))'
199
200     _SHOW_FIELDS = ('id', 'title', )
201     _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
202
203     _TESTS = [{
204         'url': 'https://www.tvnow.at/vox/ab-ins-beet',
205         'only_matching': 'True',
206     }]
207
208     @classmethod
209     def suitable(cls, url):
210         return False if TVNowIE.suitable(url) or TVNowListIE.suitable(url) else super(TVNowListChannelIE, cls).suitable(url)
211
212     def _real_extract(self, url):
213         base_url, show_id = re.match(self._VALID_URL, url).groups()
214
215         list_info = self._tvnow_list_info(show_id, show_id, self._extend_query(self._SHOW_FIELDS, self._SEASON_FIELDS))
216
217         entries = []
218         for season_info in list_info['formatTabs']['items']:
219             season_url = season_info.get('seoheadline')
220             if not season_url:
221                 continue
222             entries.append(self.url_result(
223                 base_url + "/list/" + season_url, 'TVNowList', compat_str(season_info.get('id')), season_info.get('headline')))
224
225         return self.playlist_result(entries)