X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fkaltura.py;h=ddf1165ffb021005119622d3f162cbce9c637b55;hb=14704aeff6eeee4357e3a26f83432ff908db64fc;hp=147bb8cf08f836c70f4dc63ed83569010df1c4f7;hpb=81953d1ae53bc5b3344243480316d751004a4d40;p=youtube-dl diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 147bb8cf0..ddf1165ff 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -62,6 +62,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', 'only_matching': True, + }, + { + # video with subtitles + 'url': 'kaltura:111032:1_cw786r8q', + 'only_matching': True, } ] @@ -91,14 +96,15 @@ class KalturaIE(InfoExtractor): if mobj: embed_info = mobj.groupdict() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info + escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( - ']+src=(?:["\'])((?:https?:)?//.+?)/p/%(partner_id)s/sp/%(partner_id)s00/embedIframeJs' % embed_info, + r']+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), webpage) if service_url: url = smuggle_url(url, {'service_url': service_url.group(1)}) return url - def _kaltura_api_call(self, video_id, actions, *args, **kwargs): + def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] if len(actions) > 1: for i, a in enumerate(actions[1:], start=1): @@ -106,7 +112,7 @@ class KalturaIE(InfoExtractor): params['%d:%s' % (i, k)] = v data = self._download_json( - self._SERVICE_URL + self._SERVICE_BASE, + (service_url or self._SERVICE_URL) + self._SERVICE_BASE, video_id, query=params, *args, **kwargs) status = data if len(actions) == 1 else data[0] @@ -116,7 +122,7 @@ class KalturaIE(InfoExtractor): return data - def _get_kaltura_signature(self, video_id, partner_id): + def _get_kaltura_signature(self, video_id, partner_id, service_url=None): actions = [{ 'apiVersion': '3.1', 'expiry': 86400, @@ -126,10 +132,9 @@ class KalturaIE(InfoExtractor): 'widgetId': '_%s' % partner_id, }] return self._kaltura_api_call( - video_id, actions, note='Downloading Kaltura signature')['ks'] + video_id, actions, service_url, note='Downloading Kaltura signature')['ks'] - def _get_video_info(self, video_id, partner_id): - signature = self._get_kaltura_signature(video_id, partner_id) + def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { 'action': 'null', @@ -137,34 +142,44 @@ class KalturaIE(InfoExtractor): 'clientTag': 'kdp:v3.8.5', 'format': 1, # JSON, 2 = XML, 3 = PHP 'service': 'multirequest', - 'ks': signature, + }, + { + 'expiry': 86400, + 'service': 'session', + 'action': 'startWidgetSession', + 'widgetId': '_%s' % partner_id, }, { 'action': 'get', 'entryId': video_id, 'service': 'baseentry', - 'version': '-1', + 'ks': '{1:result:ks}', }, { 'action': 'getbyentryid', 'entryId': video_id, 'service': 'flavorAsset', + 'ks': '{1:result:ks}', + }, + { + 'action': 'list', + 'filter:entryIdEqual': video_id, + 'service': 'caption_captionasset', + 'ks': '{1:result:ks}', }, ] return self._kaltura_api_call( - video_id, actions, note='Downloading video info JSON') + video_id, actions, service_url, note='Downloading video info JSON') def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) - service_url = smuggled_data.get('service_url') - if service_url: - self._SERVICE_URL = service_url mobj = re.match(self._VALID_URL, url) partner_id, entry_id = mobj.group('partner_id', 'id') ks = None + captions = None if partner_id and entry_id: - info, flavor_assets = self._get_video_info(entry_id, partner_id) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) else: path, query = mobj.group('path', 'query') if not path and not query: @@ -183,7 +198,7 @@ class KalturaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) if 'entry_id' in params: entry_id = params['entry_id'][0] - info, flavor_assets = self._get_video_info(entry_id, partner_id) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id) elif 'uiconf_id' in params and 'flashvars[referenceId]' in params: reference_id = params['flashvars[referenceId]'][0] webpage = self._download_webpage(url, reference_id) @@ -219,7 +234,7 @@ class KalturaIE(InfoExtractor): formats = [] for f in flavor_assets: # Continue if asset is not ready - if f['status'] != 2: + if f.get('status') != 2: continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) @@ -242,13 +257,24 @@ class KalturaIE(InfoExtractor): m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._check_formats(formats, entry_id) self._sort_formats(formats) + subtitles = {} + if captions: + for caption in captions.get('objects', []): + # Continue if caption is not ready + if f.get('status') != 2: + continue + subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ + 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), + 'ext': caption.get('fileExt'), + }) + return { 'id': entry_id, 'title': info['name'], 'formats': formats, + 'subtitles': subtitles, 'description': clean_html(info.get('description')), 'thumbnail': info.get('thumbnailUrl'), 'duration': info.get('duration'),