X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fooyala.py;h=eb957b8fefc9dd4fbbbad0279b4d87687a501b07;hb=HEAD;hp=3b692e9036a4faa522c8da799aec3abe6ea1bf1c;hpb=58c8451f3675bf5a17685fefae203c41cebb18cc;p=youtube-dl diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 3b692e903..eb957b8fe 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,69 +1,112 @@ from __future__ import unicode_literals -import re + import base64 +import re from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) from ..utils import ( - int_or_none, - float_or_none, + determine_ext, ExtractorError, + float_or_none, + int_or_none, + try_get, unsmuggle_url, ) -from ..compat import compat_urllib_parse class OoyalaBaseIE(InfoExtractor): + _PLAYER_BASE = 'http://player.ooyala.com/' + _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s' - def _extract(self, content_tree_url, video_id, domain='example.org'): + def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] pcode = metadata.get('asset_pcode') or embed_code - video_info = { - 'id': embed_code, - 'title': metadata['title'], - 'description': metadata.get('description'), - 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), - 'duration': int_or_none(metadata.get('duration')), - } + title = metadata['title'] + + auth_data = self._download_json( + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code), + video_id, headers=self.geo_verification_headers(), query={ + 'domain': domain or 'player.ooyala.com', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', + 'embedToken': embed_token, + })['authorization_data'][embed_code] + urls = [] formats = [] - for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): - auth_data = self._download_json( - 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}), - video_id, 'Downloading %s JSON' % supported_format) - - cur_auth_data = auth_data['authorization_data'][embed_code] - - if cur_auth_data['authorized']: - for stream in cur_auth_data['streams']: - url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') - delivery_type = stream['delivery_type'] - if delivery_type == 'remote_asset': - video_info['url'] = url - return video_info - if delivery_type == 'hls': - formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds': - formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False)) - else: - formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), - 'vcodec': stream.get('video_codec'), - 'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')), - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) + streams = auth_data.get('streams') or [{ + 'delivery_type': 'hls', + 'url': { + 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(), + } + }] + for stream in streams: + url_data = try_get(stream, lambda x: x['url']['data'], compat_str) + if not url_data: + continue + s_url = compat_b64decode(url_data).decode('utf-8') + if not s_url or s_url in urls: + continue + urls.append(s_url) + ext = determine_ext(s_url, None) + delivery_type = stream.get('delivery_type') + if delivery_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds' or ext == 'f4m': + formats.extend(self._extract_f4m_formats( + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + s_url, embed_code, fatal=False)) else: - raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True) + formats.append({ + 'url': s_url, + 'ext': ext or delivery_type, + 'vcodec': stream.get('video_codec'), + 'format_id': delivery_type, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + if not formats and not auth_data.get('authorized'): + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, auth_data['message']), expected=True) self._sort_formats(formats) - video_info['formats'] = formats - return video_info + subtitles = {} + for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles[lang] = [{ + 'url': sub_url, + }] + + return { + 'id': embed_code, + 'title': title, + 'description': metadata.get('description'), + 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), + 'duration': float_or_none(metadata.get('duration'), 1000), + 'subtitles': subtitles, + 'formats': formats, + } class OoyalaIE(OoyalaBaseIE): @@ -78,8 +121,10 @@ class OoyalaIE(OoyalaBaseIE): 'ext': 'mp4', 'title': 'Explaining Data Recovery from Hard Drives and SSDs', 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', - 'duration': 853386, + 'duration': 853.386, }, + # The video in the original webpage now uses PlayWire + 'skip': 'Ooyala said: movie expired', }, { # Only available for ipad 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', @@ -87,7 +132,7 @@ class OoyalaIE(OoyalaBaseIE): 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', 'ext': 'mp4', 'title': 'Simulation Overview - Levels of Simulation', - 'duration': 194948, + 'duration': 194.948, }, }, { @@ -99,8 +144,13 @@ class OoyalaIE(OoyalaBaseIE): 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx', 'ext': 'mp4', 'title': 'Divide Tool Path.mp4', - 'duration': 204405, + 'duration': 204.405, } + }, + { + # empty stream['url']['data'] + 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is', + 'only_matching': True, } ] @@ -117,8 +167,10 @@ class OoyalaIE(OoyalaBaseIE): url, smuggled_data = unsmuggle_url(url, {}) embed_code = self._match_id(url) domain = smuggled_data.get('domain') - content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain) + supportedformats = smuggled_data.get('supportedformats') + embed_token = smuggled_data.get('embed_token') + content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) + return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) class OoyalaExternalIE(OoyalaBaseIE): @@ -144,7 +196,7 @@ class OoyalaExternalIE(OoyalaBaseIE): 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', - 'duration': 1302000, + 'duration': 1302.0, }, 'params': { # m3u8 download @@ -154,5 +206,5 @@ class OoyalaExternalIE(OoyalaBaseIE): def _real_extract(self, url): partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() - content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) + content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id) return self._extract(content_tree_url, video_id)