X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fkaraoketv.py;h=bfccf89b0fda0be1100764290681a53e022947e0;hb=HEAD;hp=4d50308cc44de26269eb9727203c8662c857df7c;hpb=c816336cbdb91efa282c0ede8552157861f10e76;p=youtube-dl diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index 4d50308cc..bfccf89b0 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -1,47 +1,64 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json -import sys - from .common import InfoExtractor -from ..utils import compat_urllib_parse, ExtractorError class KaraoketvIE(InfoExtractor): - _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P\d+)' _TEST = { - 'url': 'http://karaoketv.co.il/?container=songs&id=171568', + 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { - 'id': '171568', - 'ext': 'mp4', - 'title': 'אל העולם שלך - רותם כהן - שרים קריוקי', + 'id': '58356', + 'ext': 'flv', + 'title': 'קריוקי של איזון', + }, + 'params': { + # rtmp download + 'skip_download': True, } } def _real_extract(self, url): + video_id = self._match_id(url) - # BUG: SSL23_GET_SERVER_HELLO:unknown protocol - if sys.hexversion < 0x03000000: - raise ExtractorError("Only python 3 supported.\n") + webpage = self._download_webpage(url, video_id) + api_page_url = self._search_regex( + r']+src=(["\'])(?Phttps?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1', + webpage, 'API play URL', group='url') - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') + api_page = self._download_webpage(api_page_url, video_id) + video_cdn_url = self._search_regex( + r']+src=(["\'])(?Phttps?://www\.video-cdn\.com/embed/iframe/.+?)\1', + api_page, 'video cdn URL', group='url') - webpage = self._download_webpage(url, video_id) + video_cdn = self._download_webpage(video_cdn_url, video_id) + play_path = self._parse_json( + self._search_regex( + r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'), + video_id)['clip']['url'] - settings_json = compat_urllib_parse.unquote_plus(self._search_regex(r'config=(.*)', self._og_search_video_url(webpage ,video_id), '')) - - urls_info_webpage = self._download_webpage(settings_json, 'Downloading settings json') + settings = self._parse_json( + self._search_regex( + r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'), + video_id, fatal=False) or {} - urls_info_json = json.loads(urls_info_webpage.replace('\'', '"')) + servers = settings.get('servers') + if not servers or not isinstance(servers, list): + servers = ('wowzail.video-cdn.com:80/vodcdn', ) - url = urls_info_json['playlist'][0]['url'] + formats = [{ + 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server, + 'play_path': play_path, + 'app': 'vodcdn', + 'page_url': video_cdn_url, + 'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf', + 'rtmp_real_time': True, + 'ext': 'flv', + } for server in servers] return { 'id': video_id, 'title': self._og_search_title(webpage), - 'url': url, - } \ No newline at end of file + 'formats': formats, + }