034bd47ff617bdc96d572b7065b3af03c7117468
[youtube-dl] / youtube_dl / extractor / spiegeltv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_urllib_parse_urlparse
6 from ..utils import (
7     determine_ext,
8     float_or_none,
9 )
10
11
12 class SpiegeltvIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
14     _TESTS = [{
15         'url': 'http://www.spiegel.tv/filme/flug-mh370/',
16         'info_dict': {
17             'id': 'flug-mh370',
18             'ext': 'm4v',
19             'title': 'Flug MH370',
20             'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
21             'thumbnail': 're:http://.*\.jpg$',
22         },
23         'params': {
24             # m3u8 download
25             'skip_download': True,
26         }
27     }, {
28         'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
29         'only_matching': True,
30     }]
31
32     def _real_extract(self, url):
33         if '/#/' in url:
34             url = url.replace('/#/', '/')
35         video_id = self._match_id(url)
36         webpage = self._download_webpage(url, video_id)
37         title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
38
39         apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
40         version_json = self._download_json(
41             '%s/version.json' % apihost, video_id,
42             note='Downloading version information')
43         version_name = version_json['version_name']
44
45         slug_json = self._download_json(
46             '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
47             video_id,
48             note='Downloading object information')
49         oid = slug_json['object_id']
50
51         media_json = self._download_json(
52             '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
53             video_id, note='Downloading media information')
54         uuid = media_json['uuid']
55         is_wide = media_json['is_wide']
56
57         server_json = self._download_json(
58             'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
59             video_id, note='Downloading server information')
60
61         format = '16x9' if is_wide else '4x3'
62
63         formats = []
64         for streamingserver in server_json['streamingserver']:
65             endpoint = streamingserver.get('endpoint')
66             if not endpoint:
67                 continue
68             play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
69             if endpoint.startswith('rtmp'):
70                 formats.append({
71                     'url': endpoint,
72                     'format_id': 'rtmp',
73                     'app': compat_urllib_parse_urlparse(endpoint).path[1:],
74                     'play_path': play_path,
75                     'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
76                     'ext': 'flv',
77                     'rtmp_live': True,
78                 })
79             elif determine_ext(endpoint) == 'm3u8':
80                 formats.append({
81                     'url': endpoint.replace('[video]', play_path),
82                     'ext': 'm4v',
83                     'format_id': 'hls',  # Prefer hls since it allows to workaround georestriction
84                     'protocol': 'm3u8',
85                     'preference': 1,
86                     'http_headers': {
87                         'Accept-Encoding': 'deflate',  # gzip causes trouble on the server side
88                     },
89                 })
90             else:
91                 formats.append({
92                     'url': endpoint,
93                 })
94         self._check_formats(formats, video_id)
95
96         thumbnails = []
97         for image in media_json['images']:
98             thumbnails.append({
99                 'url': image['url'],
100                 'width': image['width'],
101                 'height': image['height'],
102             })
103
104         description = media_json['subtitle']
105         duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
106
107         return {
108             'id': video_id,
109             'title': title,
110             'description': description,
111             'duration': duration,
112             'thumbnails': thumbnails,
113             'formats': formats,
114         }