X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=2c7d968a848a2c9d4dbad8960aeac2e6f0b55cfd;hb=a2973eb59733c5f86a249c627d654b789020bc7d;hp=c1d4320e1ce73b6d9c5a9313eee61a7e05f9daf9;hpb=fff3455f58153879214c2f08b05de1555f9cc73a;p=youtube-dl
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index c1d4320e1..2c7d968a8 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -3,16 +3,17 @@ from __future__ import unicode_literals
import re
import json
-import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import (
+ compat_etree_fromstring,
compat_parse_qs,
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
+ compat_xml_parse_error,
)
from ..utils import (
determine_ext,
@@ -21,6 +22,10 @@ from ..utils import (
fix_xml_ampersands,
unescapeHTML,
unsmuggle_url,
+ js_to_json,
+ int_or_none,
+ parse_iso8601,
+ extract_attributes,
)
@@ -118,8 +123,8 @@ class BrightcoveIE(InfoExtractor):
object_str = fix_xml_ampersands(object_str)
try:
- object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
- except xml.etree.ElementTree.ParseError:
+ object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
+ except compat_xml_parse_error:
return
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
@@ -156,6 +161,28 @@ class BrightcoveIE(InfoExtractor):
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase
+ return cls._make_brightcove_url(params)
+
+ @classmethod
+ def _build_brighcove_url_from_js(cls, object_js):
+ # The layout of JS is as follows:
+ # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+ # // build Brightcove XML
+ # }
+ m = re.search(
+ r'''(?x)customBC.\createVideo\(
+ .*? # skipping width and height
+ ["\'](?P\d+)["\']\s*,\s* # playerID
+ ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
+ # in length, however it's appended to itself
+ # in places, so truncate
+ ["\'](?P\d+)["\'] # @videoPlayer
+ ''', object_js)
+ if m:
+ return cls._make_brightcove_url(m.groupdict())
+
+ @classmethod
+ def _make_brightcove_url(cls, params):
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
@@ -188,7 +215,12 @@ class BrightcoveIE(InfoExtractor):
[^>]*?>\s*\s*''',
webpage)
- return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+ if matches:
+ return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+
+ return list(filter(None, [
+ cls._build_brighcove_url_from_js(custom_bc)
+ for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -318,3 +350,94 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info
+
+
+class BrightcoveInPageEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P\d+)'
+ _TEST = {
+ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+ 'md5': 'c8100925723840d4b0d243f7025703be',
+ 'info_dict': {
+ 'id': '4463358922001',
+ 'ext': 'mp4',
+ 'title': 'Meet the man behind Popcorn Time',
+ 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+ 'timestamp': 1441391203,
+ 'upload_date': '20150904',
+ 'duration': 165768,
+ 'uploader_id': '929656772001',
+ }
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ video_attributes = re.search(r'(?s)