[openload] Add required version
[youtube-dl] / youtube_dl / extractor / limelight.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_HTTPError
8 from ..utils import (
9     determine_ext,
10     float_or_none,
11     int_or_none,
12     unsmuggle_url,
13     ExtractorError,
14 )
15
16
17 class LimelightBaseIE(InfoExtractor):
18     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
19     _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
20
21     def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
22         headers = {}
23         if referer:
24             headers['Referer'] = referer
25         try:
26             return self._download_json(
27                 self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
28                 item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
29         except ExtractorError as e:
30             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
31                 error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
32                 if error == 'CountryDisabled':
33                     self.raise_geo_restricted()
34                 raise ExtractorError(error, expected=True)
35             raise
36
37     def _call_api(self, organization_id, item_id, method):
38         return self._download_json(
39             self._API_URL % (organization_id, self._API_PATH, item_id, method),
40             item_id, 'Downloading API %s JSON' % method)
41
42     def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
43         pc = self._call_playlist_service(item_id, pc_method, referer=referer)
44         metadata = self._call_api(pc['orgId'], item_id, meta_method)
45         mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
46         return pc, mobile, metadata
47
48     def _extract_info(self, streams, mobile_urls, properties):
49         video_id = properties['media_id']
50         formats = []
51         urls = []
52         for stream in streams:
53             stream_url = stream.get('url')
54             if not stream_url or stream.get('drmProtected') or stream_url in urls:
55                 continue
56             urls.append(stream_url)
57             ext = determine_ext(stream_url)
58             if ext == 'f4m':
59                 formats.extend(self._extract_f4m_formats(
60                     stream_url, video_id, f4m_id='hds', fatal=False))
61             else:
62                 fmt = {
63                     'url': stream_url,
64                     'abr': float_or_none(stream.get('audioBitRate')),
65                     'fps': float_or_none(stream.get('videoFrameRate')),
66                     'ext': ext,
67                 }
68                 width = int_or_none(stream.get('videoWidthInPixels'))
69                 height = int_or_none(stream.get('videoHeightInPixels'))
70                 vbr = float_or_none(stream.get('videoBitRate'))
71                 if width or height or vbr:
72                     fmt.update({
73                         'width': width,
74                         'height': height,
75                         'vbr': vbr,
76                     })
77                 else:
78                     fmt['vcodec'] = 'none'
79                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
80                 if rtmp:
81                     format_id = 'rtmp'
82                     if stream.get('videoBitRate'):
83                         format_id += '-%d' % int_or_none(stream['videoBitRate'])
84                     http_format_id = format_id.replace('rtmp', 'http')
85
86                     CDN_HOSTS = (
87                         ('delvenetworks.com', 'cpl.delvenetworks.com'),
88                         ('video.llnw.net', 's2.content.video.llnw.net'),
89                     )
90                     for cdn_host, http_host in CDN_HOSTS:
91                         if cdn_host not in rtmp.group('host').lower():
92                             continue
93                         http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
94                         urls.append(http_url)
95                         if self._is_valid_url(http_url, video_id, http_format_id):
96                             http_fmt = fmt.copy()
97                             http_fmt.update({
98                                 'url': http_url,
99                                 'format_id': http_format_id,
100                             })
101                             formats.append(http_fmt)
102                             break
103
104                     fmt.update({
105                         'url': rtmp.group('url'),
106                         'play_path': rtmp.group('playpath'),
107                         'app': rtmp.group('app'),
108                         'ext': 'flv',
109                         'format_id': format_id,
110                     })
111                 formats.append(fmt)
112
113         for mobile_url in mobile_urls:
114             media_url = mobile_url.get('mobileUrl')
115             format_id = mobile_url.get('targetMediaPlatform')
116             if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
117                 continue
118             urls.append(media_url)
119             ext = determine_ext(media_url)
120             if ext == 'm3u8':
121                 formats.extend(self._extract_m3u8_formats(
122                     media_url, video_id, 'mp4', 'm3u8_native',
123                     m3u8_id=format_id, fatal=False))
124             elif ext == 'f4m':
125                 formats.extend(self._extract_f4m_formats(
126                     stream_url, video_id, f4m_id=format_id, fatal=False))
127             else:
128                 formats.append({
129                     'url': media_url,
130                     'format_id': format_id,
131                     'preference': -1,
132                     'ext': ext,
133                 })
134
135         self._sort_formats(formats)
136
137         title = properties['title']
138         description = properties.get('description')
139         timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
140         duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
141         filesize = int_or_none(properties.get('total_storage_in_bytes'))
142         categories = [properties.get('category')]
143         tags = properties.get('tags', [])
144         thumbnails = [{
145             'url': thumbnail['url'],
146             'width': int_or_none(thumbnail.get('width')),
147             'height': int_or_none(thumbnail.get('height')),
148         } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
149
150         subtitles = {}
151         for caption in properties.get('captions', []):
152             lang = caption.get('language_code')
153             subtitles_url = caption.get('url')
154             if lang and subtitles_url:
155                 subtitles.setdefault(lang, []).append({
156                     'url': subtitles_url,
157                 })
158         closed_captions_url = properties.get('closed_captions_url')
159         if closed_captions_url:
160             subtitles.setdefault('en', []).append({
161                 'url': closed_captions_url,
162                 'ext': 'ttml',
163             })
164
165         return {
166             'id': video_id,
167             'title': title,
168             'description': description,
169             'formats': formats,
170             'timestamp': timestamp,
171             'duration': duration,
172             'filesize': filesize,
173             'categories': categories,
174             'tags': tags,
175             'thumbnails': thumbnails,
176             'subtitles': subtitles,
177         }
178
179
180 class LimelightMediaIE(LimelightBaseIE):
181     IE_NAME = 'limelight'
182     _VALID_URL = r'''(?x)
183                         (?:
184                             limelight:media:|
185                             https?://
186                                 (?:
187                                     link\.videoplatform\.limelight\.com/media/|
188                                     assets\.delvenetworks\.com/player/loader\.swf
189                                 )
190                                 \?.*?\bmediaId=
191                         )
192                         (?P<id>[a-z0-9]{32})
193                     '''
194     _TESTS = [{
195         'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
196         'info_dict': {
197             'id': '3ffd040b522b4485b6d84effc750cd86',
198             'ext': 'mp4',
199             'title': 'HaP and the HB Prince Trailer',
200             'description': 'md5:8005b944181778e313d95c1237ddb640',
201             'thumbnail': r're:^https?://.*\.jpeg$',
202             'duration': 144.23,
203             'timestamp': 1244136834,
204             'upload_date': '20090604',
205         },
206         'params': {
207             # m3u8 download
208             'skip_download': True,
209         },
210     }, {
211         # video with subtitles
212         'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
213         'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
214         'info_dict': {
215             'id': 'a3e00274d4564ec4a9b29b9466432335',
216             'ext': 'mp4',
217             'title': '3Play Media Overview Video',
218             'thumbnail': r're:^https?://.*\.jpeg$',
219             'duration': 78.101,
220             'timestamp': 1338929955,
221             'upload_date': '20120605',
222             'subtitles': 'mincount:9',
223         },
224     }, {
225         'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
226         'only_matching': True,
227     }]
228     _PLAYLIST_SERVICE_PATH = 'media'
229     _API_PATH = 'media'
230
231     def _real_extract(self, url):
232         url, smuggled_data = unsmuggle_url(url, {})
233         video_id = self._match_id(url)
234         self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
235
236         pc, mobile, metadata = self._extract(
237             video_id, 'getPlaylistByMediaId',
238             'getMobilePlaylistByMediaId', 'properties',
239             smuggled_data.get('source_url'))
240
241         return self._extract_info(
242             pc['playlistItems'][0].get('streams', []),
243             mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
244             metadata)
245
246
247 class LimelightChannelIE(LimelightBaseIE):
248     IE_NAME = 'limelight:channel'
249     _VALID_URL = r'''(?x)
250                         (?:
251                             limelight:channel:|
252                             https?://
253                                 (?:
254                                     link\.videoplatform\.limelight\.com/media/|
255                                     assets\.delvenetworks\.com/player/loader\.swf
256                                 )
257                                 \?.*?\bchannelId=
258                         )
259                         (?P<id>[a-z0-9]{32})
260                     '''
261     _TESTS = [{
262         'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
263         'info_dict': {
264             'id': 'ab6a524c379342f9b23642917020c082',
265             'title': 'Javascript Sample Code',
266         },
267         'playlist_mincount': 3,
268     }, {
269         'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
270         'only_matching': True,
271     }]
272     _PLAYLIST_SERVICE_PATH = 'channel'
273     _API_PATH = 'channels'
274
275     def _real_extract(self, url):
276         url, smuggled_data = unsmuggle_url(url, {})
277         channel_id = self._match_id(url)
278
279         pc, mobile, medias = self._extract(
280             channel_id, 'getPlaylistByChannelId',
281             'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
282             'media', smuggled_data.get('source_url'))
283
284         entries = [
285             self._extract_info(
286                 pc['playlistItems'][i].get('streams', []),
287                 mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
288                 medias['media_list'][i])
289             for i in range(len(medias['media_list']))]
290
291         return self.playlist_result(entries, channel_id, pc['title'])
292
293
294 class LimelightChannelListIE(LimelightBaseIE):
295     IE_NAME = 'limelight:channel_list'
296     _VALID_URL = r'''(?x)
297                         (?:
298                             limelight:channel_list:|
299                             https?://
300                                 (?:
301                                     link\.videoplatform\.limelight\.com/media/|
302                                     assets\.delvenetworks\.com/player/loader\.swf
303                                 )
304                                 \?.*?\bchannelListId=
305                         )
306                         (?P<id>[a-z0-9]{32})
307                     '''
308     _TESTS = [{
309         'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
310         'info_dict': {
311             'id': '301b117890c4465c8179ede21fd92e2b',
312             'title': 'Website - Hero Player',
313         },
314         'playlist_mincount': 2,
315     }, {
316         'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
317         'only_matching': True,
318     }]
319     _PLAYLIST_SERVICE_PATH = 'channel_list'
320
321     def _real_extract(self, url):
322         channel_list_id = self._match_id(url)
323
324         channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
325
326         entries = [
327             self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
328             for channel in channel_list['channelList']]
329
330         return self.playlist_result(entries, channel_list_id, channel_list['title'])