X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=a25417f94f846a70080c51661b57147e01e76b47;hb=e0741fd4496c85ef447e72df935cb6edd1af53ed;hp=ffe7c57adfb26b2bfc4ec4c30cbfa0c7cde93ac8;hpb=9f02ff537c6ddfd3f1ea3586f3e44f0ec07a2aea;p=youtube-dl diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index ffe7c57ad..a25417f94 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -30,11 +30,10 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformBaseIE(OnceIE): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): - meta = self._download_xml(smil_url, video_id, note=note) - error_element = find_xpath_attr( - meta, _x('.//smil:ref'), 'src', - 'http://link.theplatform.com/s/errorFiles/Unavailable.mp4') - if error_element is not None: + meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'}) + error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') + if error_element is not None and error_element.attrib['src'].startswith( + 'http://link.theplatform.com/s/errorFiles/Unavailable.'): raise ExtractorError(error_element.attrib['abstract'], expected=True) smil_formats = self._parse_smil_formats( @@ -51,8 +50,6 @@ class ThePlatformBaseIE(OnceIE): else: formats.append(_format) - self._sort_formats(formats) - subtitles = self._parse_smil_subtitles(meta, default_ns) return formats, subtitles @@ -77,13 +74,15 @@ class ThePlatformBaseIE(OnceIE): 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], 'duration': int_or_none(info.get('duration'), 1000), + 'timestamp': int_or_none(info.get('pubDate'), 1000) or None, + 'uploader': info.get('billingCode'), } class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?P(?:(?:[^/]+/)+select/)?media/)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -95,6 +94,9 @@ class ThePlatformIE(ThePlatformBaseIE): 'title': 'Blackberry\'s big, bold Z30', 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', 'duration': 247, + 'timestamp': 1383239700, + 'upload_date': '20131031', + 'uploader': 'CBSI-NEW', }, 'params': { # rtmp download @@ -108,6 +110,9 @@ class ThePlatformIE(ThePlatformBaseIE): 'ext': 'flv', 'description': 'md5:ac330c9258c04f9d7512cf26b9595409', 'title': 'Tesla Model S: A second step towards a cleaner motoring future', + 'timestamp': 1426176191, + 'upload_date': '20150312', + 'uploader': 'CBSI-NEW', }, 'params': { # rtmp download @@ -120,6 +125,7 @@ class ThePlatformIE(ThePlatformBaseIE): 'ext': 'mp4', 'description': 'md5:644ad9188d655b742f942bf2e06b002d', 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', + 'uploader': 'EGSM', } }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', @@ -136,6 +142,7 @@ class ThePlatformIE(ThePlatformBaseIE): 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1435752600, 'upload_date': '20150701', + 'uploader': 'NBCU-NEWS', }, }, { # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1 @@ -152,11 +159,11 @@ class ThePlatformIE(ThePlatformBaseIE): def str_to_hex(str): return binascii.b2a_hex(str.encode('ascii')).decode('ascii') - def hex_to_str(hex): - return binascii.a2b_hex(hex) + def hex_to_bytes(hex): + return binascii.a2b_hex(hex.encode('ascii')) - relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0] - clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) + relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) + clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) return '%s&sig=%s' % (url, sig) @@ -171,10 +178,10 @@ class ThePlatformIE(ThePlatformBaseIE): if not provider_id: provider_id = 'dJ5BDC' - path = provider_id + path = provider_id + '/' if mobj.group('media'): - path += '/media' - path += '/' + video_id + path += mobj.group('media') + path += video_id qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) if 'guid' in qs_dict: @@ -213,7 +220,7 @@ class ThePlatformIE(ThePlatformBaseIE): webpage, 'smil url', group='url') path = self._search_regex( r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path') - smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL' + smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4' elif mobj.group('config'): config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') @@ -223,15 +230,16 @@ class ThePlatformIE(ThePlatformBaseIE): release_url = config['releaseUrl'] else: release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path - smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m' + smil_url = release_url + '&formats=MPEG4&manifest=f4m' else: - smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path + smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path sig = smuggled_data.get('sig') if sig: smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) + self._sort_formats(formats) ret = self.get_metadata(path, video_id) combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) @@ -261,6 +269,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): 'timestamp': 1391824260, 'duration': 467.0, 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], + 'uploader': 'NBCU-NEWS', }, } @@ -280,7 +289,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): first_video_id = None duration = None for item in entry['media$content']: - smil_url = item['plfile$url'] + '&format=SMIL&mbr=true' + smil_url = item['plfile$url'] + '&mbr=true' cur_video_id = ThePlatformIE._match_id(smil_url) if first_video_id is None: first_video_id = cur_video_id