-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
import hmac
import binascii
import hashlib
-import netrc
from .once import OnceIE
+from .adobepass import AdobePassIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
- unescapeHTML,
- urlencode_postdata,
- unified_timestamp,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
- meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
- subtitles[lang] = [{
+ subtitles.setdefault(lang, []).append({
'ext': mimetype2ext(mime),
'url': src,
- }]
+ })
+
+ duration = info.get('duration')
+ tp_chapters = info.get('chapters', [])
+ chapters = []
+ if tp_chapters:
+ def _add_chapter(start_time, end_time):
+ start_time = float_or_none(start_time, 1000)
+ end_time = float_or_none(end_time, 1000)
+ if start_time is None or end_time is None:
+ return
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ })
+
+ for chapter in tp_chapters[:-1]:
+ _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+ _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': int_or_none(info.get('duration'), 1000),
+ 'duration': float_or_none(duration, 1000),
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'),
+ 'chapters': chapters,
}
def _extract_theplatform_metadata(self, path, video_id):
return self._parse_theplatform_metadata(info)
-class ThePlatformIE(ThePlatformBaseIE):
+class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
# rtmp download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
'title': 'iPhone Siri’s sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1435752600,
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
- _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod
def _extract_urls(cls, webpage):
if m:
return [m.group('url')]
+ # Are whitesapces ignored in URLs?
+ # https://github.com/rg3/youtube-dl/issues/12044
matches = re.findall(
- r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+ r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
- return list(zip(*matches))[1]
+ return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
- relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
- def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
- def xml_text(xml_str, tag):
- return self._search_regex(
- '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
-
- mvpd_headers = {
- 'ap_42': 'anonymous',
- 'ap_11': 'Linux i686',
- 'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
- 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
- }
-
- guid = xml_text(resource, 'guid')
- requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
- authn_token = requestor_info.get('authn_token')
- if authn_token:
- token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
- if token_expires and token_expires >= time.time():
- authn_token = None
- if not authn_token:
- # TODO add support for other TV Providers
- mso_id = 'DTV'
- login_info = netrc.netrc().authenticators(mso_id)
- if not login_info:
- return None
-
- def post_form(form_page, note, data={}):
- post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
- return self._download_webpage(
- post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
-
- provider_redirect_page = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
- 'Downloading Provider Redirect Page', query={
- 'noflash': 'true',
- 'mso_id': mso_id,
- 'requestor_id': requestor_id,
- 'no_iframe': 'false',
- 'domain_name': 'adobe.com',
- 'redirect_url': url,
- })
- provider_login_page = post_form(
- provider_redirect_page, 'Downloading Provider Login Page')
- mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
- 'username': login_info[0],
- 'password': login_info[2],
- })
- post_form(mvpd_confirm_page, 'Confirming Login')
-
- session = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
- 'Retrieving Session', data=urlencode_postdata({
- '_method': 'GET',
- 'requestor_id': requestor_id,
- }), headers=mvpd_headers)
- authn_token = unescapeHTML(xml_text(session, 'authnToken'))
- requestor_info['authn_token'] = authn_token
- self._downloader.cache.store('mvpd', requestor_id, requestor_info)
-
- authz_token = requestor_info.get(guid)
- if not authz_token:
- authorize = self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
- 'Retrieving Authorization Token', data=urlencode_postdata({
- 'resource_id': resource,
- 'requestor_id': requestor_id,
- 'authentication_token': authn_token,
- 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
- 'userMeta': '1',
- }), headers=mvpd_headers)
- authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
- requestor_info[guid] = authz_token
- self._downloader.cache.store('mvpd', requestor_id, requestor_info)
-
- mvpd_headers.update({
- 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
- 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
- })
-
- return self._download_webpage(
- self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
- video_id, 'Retrieving Media Token', data=urlencode_postdata({
- 'authz_token': authz_token,
- 'requestor_id': requestor_id,
- 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
- 'hashed_guid': 'false',
- }), headers=mvpd_headers)
-
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
},
}]
- def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
formats = []
subtitles = {}
if asset_type in asset_types_query:
query.update(asset_types_query[asset_type])
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
- smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+ main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)