# coding: utf-8
from __future__ import unicode_literals
-import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
+from ..utils import (
+ determine_ext,
+ float_or_none,
+)
+
class SpiegeltvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
+ _TESTS = [{
'url': 'http://www.spiegel.tv/filme/flug-mh370/',
- 'md5': '700d62dc485f3a81cf9d52144e5ead59',
'info_dict': {
'id': 'flug-mh370',
'ext': 'm4v',
'title': 'Flug MH370',
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
+ 'thumbnail': 're:http://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
}
- }
+ }, {
+ 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ if '/#/' in url:
+ url = url.replace('/#/', '/')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
- apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com';
+ apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
+ version_json = self._download_json(
+ '%s/version.json' % apihost, video_id,
+ note='Downloading version information')
+ version_name = version_json['version_name']
+
+ slug_json = self._download_json(
+ '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
+ video_id,
+ note='Downloading object information')
+ oid = slug_json['object_id']
+
+ media_json = self._download_json(
+ '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
+ video_id, note='Downloading media information')
+ uuid = media_json['uuid']
+ is_wide = media_json['is_wide']
- version_json = self._download_json('%s/version.json' % apihost, None)
- version_name = version_json['version_name']
+ server_json = self._download_json(
+ 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
+ video_id, note='Downloading server information')
- slug_json = self._download_json('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), None)
- oid = slug_json['object_id']
-
- media_json = self._download_json('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), None)
- uuid = media_json['uuid']
- is_wide = media_json['is_wide']
+ format = '16x9' if is_wide else '4x3'
- server_json = self._download_json('http://www.spiegel.tv/streaming_servers/', None)
- server = server_json[0]['endpoint']
+ formats = []
+ for streamingserver in server_json['streamingserver']:
+ endpoint = streamingserver.get('endpoint')
+ if not endpoint:
+ continue
+ play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
+ if endpoint.startswith('rtmp'):
+ formats.append({
+ 'url': endpoint,
+ 'format_id': 'rtmp',
+ 'app': compat_urllib_parse_urlparse(endpoint).path[1:],
+ 'play_path': play_path,
+ 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
+ 'ext': 'flv',
+ 'rtmp_live': True,
+ })
+ elif determine_ext(endpoint) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ endpoint.replace('[video]', play_path),
+ video_id, 'm4v',
+ preference=1, # Prefer hls since it allows to workaround georestriction
+ m3u8_id='hls', fatal=False)
+ if m3u8_formats is not False:
+ formats.extend(m3u8_formats)
+ else:
+ formats.append({
+ 'url': endpoint,
+ })
thumbnails = []
for image in media_json['images']:
- thumbnails.append({'url': image['url'], 'resolution': str(image['width']) + 'x' + str(image['height']) })
+ thumbnails.append({
+ 'url': image['url'],
+ 'width': image['width'],
+ 'height': image['height'],
+ })
description = media_json['subtitle']
- duration = int(round(media_json['duration_in_ms'] / 1000))
+ duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
- if is_wide:
- format = '16x9'
- else:
- format = '4x3'
-
- url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
-
- return_dict = {
+ return {
'id': video_id,
'title': title,
- 'url': url,
- 'ext': 'm4v',
'description': description,
'duration': duration,
- 'thumbnails': thumbnails
+ 'thumbnails': thumbnails,
+ 'formats': formats,
}
- return return_dict