X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwalla.py;h=cbb54867244839e0447324f5a57e07cef2f6c646;hb=HEAD;hp=e687c3af0a3d0e7df5f8b1f06044729631309839;hpb=e4d6cca0c1bb987592b576bd3bd439e6ecc9b342;p=youtube-dl diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py index e687c3af0..cbb548672 100644 --- a/youtube_dl/extractor/walla.py +++ b/youtube_dl/extractor/walla.py @@ -1,70 +1,86 @@ # coding: utf-8 from __future__ import unicode_literals - import re from .common import InfoExtractor +from ..utils import ( + xpath_text, + int_or_none, +) class WallaIE(InfoExtractor): - _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P\d+)' + _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P\d+)/(?P.+)' _TEST = { 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', 'info_dict': { 'id': '2642630', + 'display_id': 'one-direction-all-for-one', 'ext': 'flv', 'title': 'וואן דיירקשן: ההיסטריה', + 'description': 'md5:de9e2512a92442574cdb0913c49bc4d8', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 3600, + }, + 'params': { + # rtmp download + 'skip_download': True, } } + _SUBTITLE_LANGS = { + 'עברית': 'heb', + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + display_id = mobj.group('display_id') - config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id - - webpage = self._download_webpage(config_url, video_id, '') - - media_id = self._html_search_regex(r'(\d+)', webpage, video_id, 'extract media id') - - prefix = '0' if len(media_id) == 7 else '' - - series = '%s%s' % (prefix, media_id[0:2]) - session = media_id[2:5] - episode = media_id[5:7] - - title = self._html_search_regex(r'(.*)', webpage, video_id, 'title') - - default_quality = self._html_search_regex(r'', webpage, video_id, 0) + video = self._download_xml( + 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id, + display_id) - quality = default_quality if default_quality else '40' + item = video.find('./items/item') - media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'.*(.*)' % default_quality ,webpage, '', flags=re.DOTALL) - - playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'.*(.*)' % default_quality ,webpage, '', flags=re.DOTALL) + title = xpath_text(item, './title', 'title') + description = xpath_text(item, './synopsis', 'description') + thumbnail = xpath_text(item, './preview_pic', 'thumbnail') + duration = int_or_none(xpath_text(item, './duration', 'duration')) subtitles = {} - - subtitle_url = self._html_search_regex(r'(.*).*', webpage, video_id, 0) - - print subtitle_url - - if subtitle_url: - subtitles_page = self._download_webpage(subtitle_url, video_id, '') - subtitles['heb'] = subtitles_page + for subtitle in item.findall('./subtitles/subtitle'): + lang = xpath_text(subtitle, './title') + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ + 'ext': 'srt', + 'url': xpath_text(subtitle, './src'), + }] + + formats = [] + for quality in item.findall('./qualities/quality'): + format_id = xpath_text(quality, './title') + fmt = { + 'url': 'rtmp://wafla.walla.co.il/vod', + 'play_path': xpath_text(quality, './src'), + 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', + 'page_url': url, + 'ext': 'flv', + 'format_id': xpath_text(quality, './title'), + } + m = re.search(r'^(?P\d+)[Pp]', format_id) + if m: + fmt['height'] = int(m.group('height')) + formats.append(fmt) + self._sort_formats(formats) return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'url': 'rtmp://wafla.walla.co.il:1935/vod', - 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', - 'page_url': url, - 'app': "vod", - 'play_path': playpath, - 'tc_url': 'rtmp://wafla.walla.co.il:1935/vod', - 'rtmp_protocol': 'rtmp', - 'ext': 'flv', + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, 'subtitles': subtitles, - } \ No newline at end of file + }