X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fspiegeltv.py;h=98cf92d89a1151edfd11b8f15a86eeaa6a83178d;hb=05a976cd99ef2a0eb0b301cd4f98e1aec927968c;hp=e8f49bc52aad1da75bd05811339f426d5970159f;hpb=4ffeca4ea29fe75821c8de5fbaf8d8f585f2dbb4;p=youtube-dl diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index e8f49bc52..98cf92d89 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -1,66 +1,75 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json -import urllib from .common import InfoExtractor +from ..utils import float_or_none + class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P[\-a-z0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P[\-a-z0-9]+)' + _TESTS = [{ 'url': 'http://www.spiegel.tv/filme/flug-mh370/', - 'md5': '700d62dc485f3a81cf9d52144e5ead59', 'info_dict': { 'id': 'flug-mh370', 'ext': 'm4v', 'title': 'Flug MH370', 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', + 'thumbnail': 're:http://.*\.jpg$', + }, + 'params': { + # rtmp download + 'skip_download': True, } - } + }, { + 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + if '/#/' in url: + url = url.replace('/#/', '/') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.*?)', webpage, 'title') - apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'; + apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' + version_json = self._download_json( + '%s/version.json' % apihost, video_id, + note='Downloading version information') + version_name = version_json['version_name'] - version_json_code = urllib.urlopen('%s/version.json' % apihost).read() - version_json = json.loads(version_json_code) - version_name = version_json['version_name'] + slug_json = self._download_json( + '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), + video_id, + note='Downloading object information') + oid = slug_json['object_id'] - slug_json_code = urllib.urlopen('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id)).read() - slug_json = json.loads(slug_json_code) - oid = slug_json['object_id'] - - media_json_code = urllib.urlopen('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid)).read() - media_json = json.loads(media_json_code) + media_json = self._download_json( + '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), + video_id, note='Downloading media information') + uuid = media_json['uuid'] + is_wide = media_json['is_wide'] - uuid = media_json['uuid'] - is_wide = media_json['is_wide'] - - server_json_code = urllib.urlopen('http://www.spiegel.tv/streaming_servers/').read() - server_json = json.loads(server_json_code) - server = server_json[0]['endpoint'] + server_json = self._download_json( + 'http://www.spiegel.tv/streaming_servers/', video_id, + note='Downloading server information') + server = server_json[0]['endpoint'] thumbnails = [] for image in media_json['images']: - thumbnails.append({'url': image['url'], 'resolution': str(image['width']) + 'x' + str(image['height']) }) + thumbnails.append({ + 'url': image['url'], + 'width': image['width'], + 'height': image['height'], + }) description = media_json['subtitle'] - duration = int(round(media_json['duration_in_ms'] / 1000)) - - if is_wide: - format = '16x9' - else: - format = '4x3' + duration = float_or_none(media_json.get('duration_in_ms'), scale=1000) + format = '16x9' if is_wide else '4x3' url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v' - return_dict = { + return { 'id': video_id, 'title': title, 'url': url, @@ -69,4 +78,3 @@ class SpiegeltvIE(InfoExtractor): 'duration': duration, 'thumbnails': thumbnails } - return return_dict