X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fstreamcz.py;h=d3d2b7eb7a6fa9db4008365e62e046b83490b064;hb=5c2266df4b9aeb7881ed8c026a038e2a25e43734;hp=cb50aee3f54cda1fc395286fe50d9a93edb4a41d;hpb=0793a7b3c79349aefe8af7321f5ef36a034e6dc9;p=youtube-dl diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index cb50aee3f..d3d2b7eb7 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,14 +1,28 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re -import json +import hashlib +import time from .common import InfoExtractor +from ..utils import ( + int_or_none, + sanitized_Request, +) + + +def _get_api_key(api_path): + if api_path.endswith('?'): + api_path = api_path[:-1] + + api_key = 'fb5f58a820353bd7095de526253c14fd' + a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600))) + return hashlib.md5(a.encode('ascii')).hexdigest() class StreamCZIE(InfoExtractor): - _VALID_URL = r'https?://www\.stream\.cz/((?P.+)/)?(?P.+)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P[0-9]+)' + _API_URL = 'http://www.stream.cz/API' _TESTS = [{ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', @@ -17,48 +31,66 @@ class StreamCZIE(InfoExtractor): 'id': '765767', 'ext': 'mp4', 'title': 'Peklo na talíři: Éčka pro děti', - 'description': 'md5:49ace0df986e95e331d0fe239d421519', - 'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100', + 'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE', + 'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100', + 'duration': 256, + }, + }, { + 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', + 'md5': 'e54a254fb8b871968fd8403255f28589', + 'info_dict': { + 'id': '10002447', + 'ext': 'mp4', + 'title': 'Kancelář Blaník: Tři roky pro Mazánka', + 'description': 'md5:3862a00ba7bf0b3e44806b544032c859', + 'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000', + 'duration': 368, }, - }, - ] + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = self._match_id(url) + api_path = '/episode/%s' % video_id - webpage = self._download_webpage(url, video_id) - - data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data') - - jsonData = json.loads(data) + req = sanitized_Request(self._API_URL + api_path) + req.add_header('Api-Password', _get_api_key(api_path)) + data = self._download_json(req, video_id) formats = [] - for video in jsonData['instances']: - format_id = video['instances'][0]['quality'] - - if format_id == '240p': - quality = 0 - elif format_id == '360p': - quality = 1 - elif format_id == '480p': - quality = 2 - elif format_id == '720p': - quality = 3 + for quality, video in enumerate(data['video_qualities']): + for f in video['formats']: + typ = f['type'].partition('/')[2] + qlabel = video.get('quality_label') + formats.append({ + 'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ, + 'format_id': '%s-%s' % (typ, f['quality']), + 'url': f['source'], + 'height': int_or_none(f['quality'].rstrip('p')), + 'quality': quality, + }) + self._sort_formats(formats) - formats.append({ - 'format_id': format_id, - 'url': video['instances'][0]['source'], - 'quality': quality, - 'ext': 'mp4', - }) + image = data.get('image') + if image: + thumbnail = self._proto_relative_url( + image.replace('{width}', '1240').replace('{height}', '697'), + scheme='http:', + ) + else: + thumbnail = None - self._sort_formats(formats) + stream = data.get('_embedded', {}).get('stream:show', {}).get('name') + if stream: + title = '%s: %s' % (stream, data['name']) + else: + title = data['name'] return { - 'id': str(jsonData['id']), - 'title': self._og_search_title(webpage), - 'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'), + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': data.get('web_site_text'), + 'duration': int_or_none(data.get('duration')), + 'view_count': int_or_none(data.get('views')), }