X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=0bf6726b53641734fd0fcafb73a76d8c3621b302;hb=72528252e303a084c4b95ae07c7a7213e53cad8a;hp=883bf491ca8e75729a12235ef31203db7f8216c7;hpb=05fe2594e4589b4e714a423550172eeec3949a70;p=youtube-dl
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 883bf491c..0bf6726b5 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -1,7 +1,7 @@
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
-import json
import time
import hmac
import binascii
@@ -16,11 +16,12 @@ from ..compat import (
from ..utils import (
determine_ext,
ExtractorError,
- xpath_with_ns,
- unsmuggle_url,
+ float_or_none,
int_or_none,
+ sanitized_Request,
+ unsmuggle_url,
url_basename,
- float_or_none,
+ xpath_with_ns,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -28,7 +29,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(InfoExtractor):
- def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloading SMIL data'):
+ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(smil_url, video_id, note=note)
try:
error_msg = next(
@@ -54,12 +55,13 @@ class ThePlatformBaseIE(InfoExtractor):
self._sort_formats(formats)
- return formats
+ subtitles = self._parse_smil_subtitles(meta, default_ns)
+
+ return formats, subtitles
def get_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
- info_json = self._download_webpage(info_url, video_id)
- info = json.loads(info_json)
+ info = self._download_json(info_url, video_id)
subtitles = {}
captions = info.get('captions')
@@ -138,6 +140,11 @@ class ThePlatformIE(ThePlatformBaseIE):
'upload_date': '20150701',
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
},
+ }, {
+ # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
+ # geo-restricted (US), HLS encrypted with AES-128
+ 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
+ 'only_matching': True,
}]
@staticmethod
@@ -181,8 +188,12 @@ class ThePlatformIE(ThePlatformBaseIE):
# Seems there's no pattern for the interested script filename, so
# I try one by one
for script in reversed(scripts):
- feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
- feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
+ feed_script = self._download_webpage(
+ self._proto_relative_url(script, 'http:'),
+ video_id, 'Downloading feed script')
+ feed_id = self._search_regex(
+ r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
+ 'default feed id', default=None)
if feed_id is not None:
break
if feed_id is None:
@@ -192,6 +203,20 @@ class ThePlatformIE(ThePlatformBaseIE):
if smuggled_data.get('force_smil_url', False):
smil_url = url
+ # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+ elif '/guid/' in url:
+ headers = {}
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ headers['Referer'] = source_url
+ request = sanitized_Request(url, headers=headers)
+ webpage = self._download_webpage(request, video_id)
+ smil_url = self._search_regex(
+ r']+href=(["\'])(?P.+?)\1[^>]+type=["\']application/smil\+xml',
+ webpage, 'smil url', group='url')
+ path = self._search_regex(
+ r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
+ smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
elif mobj.group('config'):
config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/')
@@ -209,12 +234,14 @@ class ThePlatformIE(ThePlatformBaseIE):
if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
- formats = self._extract_theplatform_smil_formats(smil_url, video_id)
+ formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
ret = self.get_metadata(path, video_id)
+ combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({
'id': video_id,
'formats': formats,
+ 'subtitles': combined_subtitles,
})
return ret
@@ -252,6 +279,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
entry = feed['entries'][0]
formats = []
+ subtitles = {}
first_video_id = None
duration = None
for item in entry['media$content']:
@@ -260,7 +288,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
- formats.extend(self._extract_theplatform_smil_formats(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id))
+ cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
+ formats.extend(cur_formats)
+ subtitles = self._merge_subtitles(subtitles, cur_subtitles)
self._sort_formats(formats)
@@ -274,9 +304,11 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
categories = [item['media$name'] for item in entry.get('media$categories', [])]
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
+ subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
ret.update({
'id': video_id,
'formats': formats,
+ 'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': duration,
'timestamp': timestamp,