X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=29f938a76c6eec5786e979164a47cc621e739707;hb=f877c6ae5a6e252d6904f90d597479451d2107aa;hp=2d2178331ec396b937ee1115d4e22aee1844b8fb;hpb=aef8fdba1172d60983ba9685249c03b66e7a94f0;p=youtube-dl diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 2d2178331..29f938a76 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,23 +9,22 @@ import hashlib from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( determine_ext, ExtractorError, xpath_with_ns, unsmuggle_url, + int_or_none, ) -_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) +default_ns = 'http://www.w3.org/2005/SMIL21/Language' +_x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformIE(InfoExtractor): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?P(?:[^/\?]+/(?:swf|config)|onsite)/select/)? + (?:(?P(?:[^/]+/)+select/media/)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -42,8 +41,8 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, }, - # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ }, { + # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', 'info_dict': { 'id': '22d_qsQ6MIRT', @@ -55,6 +54,17 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD', + 'info_dict': { + 'id': 'yMBg9E8KFxZD', + 'ext': 'mp4', + 'description': 'md5:644ad9188d655b742f942bf2e06b002d', + 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', + } + }, { + 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', + 'only_matching': True, }] @staticmethod @@ -84,6 +94,11 @@ class ThePlatformIE(InfoExtractor): if not provider_id: provider_id = 'dJ5BDC' + path = provider_id + if mobj.group('media'): + path += '/media' + path += '/' + video_id + if smuggled_data.get('force_smil_url', False): smil_url = url elif mobj.group('config'): @@ -91,10 +106,13 @@ class ThePlatformIE(InfoExtractor): config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') config = self._download_json(config_url, video_id, 'Downloading config') - smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' + if 'releaseUrl' in config: + release_url = config['releaseUrl'] + else: + release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m' else: - smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?' - 'format=smil&mbr=true'.format(provider_id, video_id)) + smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path sig = smuggled_data.get('sig') if sig: @@ -111,7 +129,7 @@ class ThePlatformIE(InfoExtractor): else: raise ExtractorError(error_msg, expected=True) - info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id) + info_url = 'http://link.theplatform.com/s/%s?format=preview' % path info_json = self._download_webpage(info_url, video_id) info = json.loads(info_json) @@ -125,57 +143,19 @@ class ThePlatformIE(InfoExtractor): 'url': src, }] - head = meta.find(_x('smil:head')) - body = meta.find(_x('smil:body')) - - f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: - f4m_url = f4m_node.attrib['src'] - if 'manifest.f4m?' not in f4m_url: - f4m_url += '?' + formats = self._parse_smil_formats( + meta, smil_url, video_id, namespace=default_ns, # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com - f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = self._extract_f4m_formats(f4m_url, video_id) - else: - formats = [] - switch = body.find(_x('smil:switch')) - if switch is None: - switch = body.find(_x('smil:par//smil:switch')) - if switch is None: - switch = body.find(_x('smil:par')) - if switch is not None: - base_url = head.find(_x('smil:meta')).attrib['base'] - for f in switch.findall(_x('smil:video')): - attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 - format_id = '%dx%d_%dk' % (width, height, vbr) - formats.append({ - 'format_id': format_id, - 'url': base_url, - 'play_path': 'mp4:' + attr['src'], - 'ext': 'flv', - 'width': width, - 'height': height, - 'vbr': vbr, - }) - else: - switch = body.find(_x('smil:seq//smil:switch')) - for f in switch.findall(_x('smil:video')): - attr = f.attrib - vbr = int(attr['system-bitrate']) // 1000 - ext = determine_ext(attr['src']) - if ext == 'once': - ext = 'mp4' - formats.append({ - 'format_id': compat_str(vbr), - 'url': attr['src'], - 'vbr': vbr, - 'ext': ext, - }) - self._sort_formats(formats) + f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'}, + transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src)) + + for _format in formats: + ext = determine_ext(_format['url']) + if ext == 'once': + _format['ext'] = 'mp4' + + self._sort_formats(formats) return { 'id': video_id, @@ -184,5 +164,5 @@ class ThePlatformIE(InfoExtractor): 'formats': formats, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': info['duration'] // 1000, + 'duration': int_or_none(info.get('duration'), 1000), }