X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=bb3efc4ea17e6f4d0ae2252d7e6a317cd6bf7f03;hb=d993a1354def6c81f1f267cb2bfe02c478336ba1;hp=6da701a39fff3ebec1bf9aa0cdf095b904a83cf4;hpb=79ba9140dc8fcf5883b7473596e8f20cba6b479f;p=youtube-dl diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6da701a39..bb3efc4ea 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -6,6 +6,7 @@ import time import hmac import binascii import hashlib +import netrc from .once import OnceIE @@ -14,14 +15,19 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, float_or_none, int_or_none, sanitized_Request, unsmuggle_url, + update_url_query, xpath_with_ns, mimetype2ext, find_xpath_attr, + unescapeHTML, + urlencode_postdata, + unified_timestamp, ) default_ns = 'http://www.w3.org/2005/SMIL21/Language' @@ -48,18 +54,23 @@ class ThePlatformBaseIE(OnceIE): if OnceIE.suitable(_format['url']): formats.extend(self._extract_once_formats(_format['url'])) else: - formats.append(_format) + media_url = _format['url'] + if determine_ext(media_url) == 'm3u8': + hdnea2 = self._get_cookies(media_url).get('hdnea2') + if hdnea2: + _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value}) - self._sort_formats(formats) + formats.append(_format) subtitles = self._parse_smil_subtitles(meta, default_ns) return formats, subtitles - def get_metadata(self, path, video_id): + def _download_theplatform_metadata(self, path, video_id): info_url = 'http://link.theplatform.com/s/%s?format=preview' % path - info = self._download_json(info_url, video_id) + return self._download_json(info_url, video_id) + def _parse_theplatform_metadata(self, info): subtitles = {} captions = info.get('captions') if isinstance(captions, list): @@ -80,6 +91,10 @@ class ThePlatformBaseIE(OnceIE): 'uploader': info.get('billingCode'), } + def _extract_theplatform_metadata(self, path, video_id): + info = self._download_theplatform_metadata(path, video_id) + return self._parse_theplatform_metadata(info) + class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) @@ -152,6 +167,23 @@ class ThePlatformIE(ThePlatformBaseIE): 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781', 'only_matching': True, }] + _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' + + @classmethod + def _extract_urls(cls, webpage): + m = re.search( + r'''(?x) + https?://player\.theplatform\.com/p/.+?)\2 + ''', webpage) + if m: + return [m.group('url')] + + matches = re.findall( + r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + if matches: + return list(zip(*matches))[1] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): @@ -161,15 +193,105 @@ class ThePlatformIE(ThePlatformBaseIE): def str_to_hex(str): return binascii.b2a_hex(str.encode('ascii')).decode('ascii') - def hex_to_str(hex): - return binascii.a2b_hex(hex) + def hex_to_bytes(hex): + return binascii.a2b_hex(hex.encode('ascii')) relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) - clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) + clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) return '%s&sig=%s' % (url, sig) + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def xml_text(xml_str, tag): + return self._search_regex( + '<%s>(.+?)%s>' % (tag, tag), xml_str, tag) + + mvpd_headers = { + 'ap_42': 'anonymous', + 'ap_11': 'Linux i686', + 'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', + 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', + } + + guid = xml_text(resource, 'guid') + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token: + token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', '')) + if token_expires and token_expires >= time.time(): + authn_token = None + if not authn_token: + # TODO add support for other TV Providers + mso_id = 'DTV' + login_info = netrc.netrc().authenticators(mso_id) + if not login_info: + return None + + def post_form(form_page, note, data={}): + post_url = self._html_search_regex(r'