X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fhelsinki.py;h=575fb332a055465446fc5db9448313ec793d3258;hp=2a54f3cca89d063f321e512554225778c772caff;hb=dcdb292fddc82ae11f4c0b647815a45c88a6b6d5;hpb=66c43a53e4b1b4d4e530ae4dcded2d382d51b264 diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dl/extractor/helsinki.py index 2a54f3cca..575fb332a 100644 --- a/youtube_dl/extractor/helsinki.py +++ b/youtube_dl/extractor/helsinki.py @@ -1,51 +1,43 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import js_to_json class HelsinkiIE(InfoExtractor): + IE_DESC = 'helsinki.fi' _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P\d+)' _TEST = { 'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258', - 'md5': 'cd829201b890905682eb194cbdea55d7', 'info_dict': { 'id': '20258', 'ext': 'mp4', 'title': 'Tietotekniikkafoorumi-iltapäivä', + 'description': 'md5:f5c904224d43c133225130fe156a5ee0', + }, + 'params': { + 'skip_download': True, # RTMP } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - vid = mobj.group('id') - webpage = self._download_webpage(url, vid) - formats = [] - mobj = re.search('file=((\w+):[^&]+)', webpage) - if mobj: formats.append({ - 'ext': mobj.group(2), - 'play_path': mobj.group(1), - 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', - 'player_url': 'http://video.helsinki.fi/player.swf', - 'format_note': 'sd' - }) - - mobj = re.search('hd\.file=((\w+):[^&]+)', webpage) - if mobj: formats.append({ - 'ext': mobj.group(2), - 'play_path': mobj.group(1), - 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', - 'player_url': 'http://video.helsinki.fi/player.swf', - 'format_note': 'hd' - }) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + params = self._parse_json(self._html_search_regex( + r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);', + webpage, 'player code'), video_id, transform_source=js_to_json) + formats = [{ + 'url': s['file'], + 'ext': 'mp4', + } for s in params['sources']] + self._sort_formats(formats) return { - 'id': vid, + 'id': video_id, 'title': self._og_search_title(webpage).replace('Video: ', ''), 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'formats': formats + 'formats': formats, }