X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=0d162d337ac19a7f60ff5c5495e9c2f9e0e17799;hb=83cedc1cf224206adf513f5bdd5f5ce915d67933;hp=9760e606f23481afb7376bdf1f7552831b2c06c1;hpb=8ac73bdbe44e14007fb3e630eb70e2a395133480;p=youtube-dl diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 9760e606f..0d162d337 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -9,7 +9,6 @@ from ..compat import ( compat_etree_fromstring, compat_parse_qs, compat_str, - compat_urllib_parse, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, @@ -24,16 +23,16 @@ from ..utils import ( js_to_json, int_or_none, parse_iso8601, - sanitized_Request, unescapeHTML, unsmuggle_url, + update_url_query, ) class BrightcoveLegacyIE(InfoExtractor): IE_NAME = 'brightcove:legacy' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P.*)' - _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' + _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated' _TESTS = [ { @@ -137,13 +136,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: flashvars = {} + data_url = object_doc.attrib.get('data', '') + data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) + def find_param(name): if name in flashvars: return flashvars[name] node = find_xpath_attr(object_doc, './param', 'name', name) if node is not None: return node.attrib['value'] - return None + return data_url_params.get(name) params = {} @@ -156,8 +158,8 @@ class BrightcoveLegacyIE(InfoExtractor): # Not all pages define this value if playerKey is not None: params['playerKey'] = playerKey - # The three fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') + # These fields hold the id of the video + videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') if videoPlayer is not None: params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') @@ -185,8 +187,7 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): - data = compat_urllib_parse.urlencode(params) - return cls._FEDERATED_URL_TEMPLATE % data + return update_url_query(cls._FEDERATED_URL, params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -240,7 +241,7 @@ class BrightcoveLegacyIE(InfoExtractor): # We set the original url as the default 'Referer' header referer = smuggled_data.get('Referer', url) return self._get_video_info( - videoPlayer[0], query_str, query, referer=referer) + videoPlayer[0], query, referer=referer) elif 'playerKey' in query: player_key = query['playerKey'] return self._get_playlist_info(player_key[0]) @@ -249,15 +250,14 @@ class BrightcoveLegacyIE(InfoExtractor): 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', expected=True) - def _get_video_info(self, video_id, query_str, query, referer=None): - request_url = self._FEDERATED_URL_TEMPLATE % query_str - req = sanitized_Request(request_url) + def _get_video_info(self, video_id, query, referer=None): + headers = {} linkBase = query.get('linkBaseURL') if linkBase is not None: referer = linkBase[0] if referer is not None: - req.add_header('Referer', referer) - webpage = self._download_webpage(req, video_id) + headers['Referer'] = referer + webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query) error_msg = self._html_search_regex( r"

We're sorry.

([\s\n]*

.*?

)+", webpage, @@ -297,7 +297,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'uploader': video_info.get('publisherName'), } - renditions = video_info.get('renditions') + renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) if renditions: formats = [] for rend in renditions: @@ -319,13 +319,23 @@ class BrightcoveLegacyIE(InfoExtractor): if ext is None: ext = determine_ext(url) size = rend.get('size') - formats.append({ + a_format = { 'url': url, 'ext': ext, 'height': rend.get('frameHeight'), 'width': rend.get('frameWidth'), 'filesize': size if size != 0 else None, - }) + } + + # m3u8 manifests with remote == false are media playlists + # Not calling _extract_m3u8_formats here to save network traffic + if ext == 'm3u8': + a_format.update({ + 'ext': 'mp4', + 'protocol': 'm3u8', + }) + + formats.append(a_format) self._sort_formats(formats) info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: @@ -415,8 +425,8 @@ class BrightcoveNewIE(InfoExtractor): # Look for iframe embeds [1] for _, url in re.findall( - r']+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): - entries.append(url) + r']+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): + entries.append(url if url.startswith('http') else 'http:' + url) # Look for embed_in_page embeds [2] for video_id, account_id, player_id, embed in re.findall( @@ -425,11 +435,11 @@ class BrightcoveNewIE(InfoExtractor): # According to [4] data-video-id may be prefixed with ref: r'''(?sx) ]+ - data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*? + data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*? .*? ]+ src=["\'](?:https?:)?//players\.brightcove\.net/ - (\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js + (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js ''', webpage): entries.append( 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' @@ -459,12 +469,11 @@ class BrightcoveNewIE(InfoExtractor): r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - req = sanitized_Request( - 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' - % (account_id, video_id), - headers={'Accept': 'application/json;pk=%s' % policy_key}) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) try: - json_data = self._download_json(req, video_id) + json_data = self._download_json(api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: json_data = self._parse_json(e.cause.read().decode(), video_id) @@ -475,14 +484,18 @@ class BrightcoveNewIE(InfoExtractor): formats = [] for source in json_data.get('sources', []): + container = source.get('container') source_type = source.get('type') src = source.get('src') - if source_type == 'application/x-mpegURL': + if source_type == 'application/x-mpegURL' or container == 'M2TS': if not src: continue formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + src, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif source_type == 'application/dash+xml': + if not src: + continue + formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) else: streaming_src = source.get('streaming_src') stream_name, app_name = source.get('stream_name'), source.get('app_name') @@ -490,15 +503,23 @@ class BrightcoveNewIE(InfoExtractor): continue tbr = float_or_none(source.get('avg_bitrate'), 1000) height = int_or_none(source.get('height')) + width = int_or_none(source.get('width')) f = { 'tbr': tbr, - 'width': int_or_none(source.get('width')), - 'height': height, 'filesize': int_or_none(source.get('size')), - 'container': source.get('container'), - 'vcodec': source.get('codec'), - 'ext': source.get('container').lower(), + 'container': container, + 'ext': container.lower(), } + if width == 0 and height == 0: + f.update({ + 'vcodec': 'none', + }) + else: + f.update({ + 'width': width, + 'height': height, + 'vcodec': source.get('codec'), + }) def build_format_id(kind): format_id = kind