X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=863914299234fb0d7dd279da51c0f0d8c18d6534;hb=6e6bc8dae577c29c072ffc5c25078b5668435435;hp=0bf6726b53641734fd0fcafb73a76d8c3621b302;hpb=2c28da8e05f018d021d52dd1e8bc44bfbb6dbf00;p=youtube-dl diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0bf6726b5..863914299 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -8,50 +8,47 @@ import binascii import hashlib -from .common import InfoExtractor +from .once import OnceIE from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, ) from ..utils import ( - determine_ext, ExtractorError, float_or_none, int_or_none, sanitized_Request, unsmuggle_url, - url_basename, xpath_with_ns, + mimetype2ext, + find_xpath_attr, ) default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) -class ThePlatformBaseIE(InfoExtractor): +class ThePlatformBaseIE(OnceIE): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): - meta = self._download_xml(smil_url, video_id, note=note) - try: - error_msg = next( - n.attrib['abstract'] - for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired') - except StopIteration: - pass - else: - raise ExtractorError(error_msg, expected=True) + meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'}) + error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') + if error_element is not None and error_element.attrib['src'].startswith( + 'http://link.theplatform.com/s/errorFiles/Unavailable.'): + raise ExtractorError(error_element.attrib['abstract'], expected=True) - formats = self._parse_smil_formats( + smil_formats = self._parse_smil_formats( meta, smil_url, video_id, namespace=default_ns, # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'}, transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src)) - for _format in formats: - ext = determine_ext(_format['url']) - if ext == 'once': - _format['ext'] = 'mp4' + formats = [] + for _format in smil_formats: + if OnceIE.suitable(_format['url']): + formats.extend(self._extract_once_formats(_format['url'])) + else: + formats.append(_format) self._sort_formats(formats) @@ -69,7 +66,7 @@ class ThePlatformBaseIE(InfoExtractor): for caption in captions: lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') subtitles[lang] = [{ - 'ext': 'srt' if mime == 'text/srt' else 'ttml', + 'ext': mimetype2ext(mime), 'url': src, }] @@ -85,7 +82,7 @@ class ThePlatformBaseIE(InfoExtractor): class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?P(?:[^/]+/)+select/media/)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?P(?:(?:[^/]+/)+select/)?media/)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -128,7 +125,7 @@ class ThePlatformIE(ThePlatformBaseIE): 'only_matching': True, }, { 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701', - 'md5': '734f3790fb5fc4903da391beeebc4836', + 'md5': 'fb96bb3d85118930a5b055783a3bd992', 'info_dict': { 'id': 'tdy_or_siri_150701', 'ext': 'mp4', @@ -138,7 +135,6 @@ class ThePlatformIE(ThePlatformBaseIE): 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1435752600, 'upload_date': '20150701', - 'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"], }, }, { # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1 @@ -216,7 +212,7 @@ class ThePlatformIE(ThePlatformBaseIE): webpage, 'smil url', group='url') path = self._search_regex( r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path') - smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL' + smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4' elif mobj.group('config'): config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') @@ -226,9 +222,9 @@ class ThePlatformIE(ThePlatformBaseIE): release_url = config['releaseUrl'] else: release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path - smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m' + smil_url = release_url + '&formats=MPEG4&manifest=f4m' else: - smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path + smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path sig = smuggled_data.get('sig') if sig: @@ -253,7 +249,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): _TEST = { # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', - 'md5': '22d2b84f058d3586efcd99e57d59d314', + 'md5': '6e32495b5073ab414471b615c5ded394', 'info_dict': { 'id': 'n_hardball_5biden_140207', 'ext': 'mp4', @@ -283,8 +279,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE): first_video_id = None duration = None for item in entry['media$content']: - smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M' - cur_video_id = url_basename(smil_url) + smil_url = item['plfile$url'] + '&mbr=true' + cur_video_id = ThePlatformIE._match_id(smil_url) if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration'))