X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fclipfish.py;h=7af903571774beec4c44a57e768af95911ad7023;hb=33a513faf716e5d4a170da50d6fde541817dca09;hp=0d18e9a7a39e98a44bb934a3c5f12b00273b91f3;hpb=fc9e1cc69706ef079fca0ee32529503ecedae578;p=youtube-dl diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 0d18e9a7a..7af903571 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,57 +1,68 @@ +from __future__ import unicode_literals + import re -import time -import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, + parse_iso8601, + remove_end, +) class ClipfishIE(InfoExtractor): - IE_NAME = u'clipfish' - - _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P[0-9]+)' _TEST = { - u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', - u'file': u'3966754.mp4', - u'md5': u'2521cd644e862936cf2e698206e47385', - u'info_dict': { - u'title': u'FIFA 14 - E3 2013 Trailer', - u'duration': 82, + 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', + 'md5': '79bc922f3e8a9097b3d68a93780fd475', + 'info_dict': { + 'id': '3966754', + 'ext': 'mp4', + 'title': 'FIFA 14 - E3 2013 Trailer', + 'timestamp': 1370938118, + 'upload_date': '20130611', + 'duration': 82, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - - info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % - (video_id, int(time.time()))) - doc = self._download_xml( - info_url, video_id, note=u'Downloading info page') - title = doc.find('title').text - video_url = doc.find('filename').text - if video_url is None: - xml_bytes = xml.etree.ElementTree.tostring(doc) - raise ExtractorError(u'Cannot find video URL in document %r' % - xml_bytes) - thumbnail = doc.find('imageurl').text - duration_str = doc.find('duration').text - m = re.match( - r'^(?P[0-9]+):(?P[0-9]{2}):(?P[0-9]{2}):(?P[0-9]*)$', - duration_str) - if m: - duration = ( - (int(m.group('hours')) * 60 * 60) + - (int(m.group('minutes')) * 60) + - (int(m.group('seconds'))) - ) - else: - duration = None + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_info = self._parse_json( + js_to_json(self._html_search_regex( + '(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')), + video_id) + + formats = [] + for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage): + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.append({ + 'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), + 'ext': 'mp4', + 'format_id': 'hls', + }) + else: + formats.append({ + 'url': video_url, + 'format_id': ext, + }) + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' - Video') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(video_info.get('length')) + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date')) return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, + 'timestamp': timestamp, }