X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=ab62e54d63c2335d0002701c078105e00ccc6186;hb=54fc90aabfb71968f28af68dfe3f7a3544cc2f0b;hp=124497e95c7236b21d17419133cdb63cee55f955;hpb=72950c4dce413d4274323173dc3e7a2c17d93392;p=youtube-dl diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 124497e95..ab62e54d6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,6 +5,7 @@ import re import json from .common import InfoExtractor +from .adobepass import AdobePassIE from ..compat import ( compat_etree_fromstring, compat_parse_qs, @@ -131,6 +132,12 @@ class BrightcoveLegacyIE(InfoExtractor): }, 'playlist_mincount': 10, }, + { + # playerID inferred from bcpid + # from http://www.un.org/chinese/News/story.asp?NewsID=27724 + 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', + 'only_matching': True, # Tested in GenericIE + } ] FLV_VCODECS = { 1: 'SORENSON', @@ -266,9 +273,13 @@ class BrightcoveLegacyIE(InfoExtractor): if matches: return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) - return list(filter(None, [ - cls._build_brighcove_url_from_js(custom_bc) - for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)])) + matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) + if matches: + return list(filter(None, [ + cls._build_brighcove_url_from_js(custom_bc) + for custom_bc in matches])) + return [src for _, src in re.findall( + r']+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -285,6 +296,10 @@ class BrightcoveLegacyIE(InfoExtractor): if videoPlayer: # We set the original url as the default 'Referer' header referer = smuggled_data.get('Referer', url) + if 'playerID' not in query: + mobj = re.search(r'/bcpid(\d+)', url) + if mobj is not None: + query['playerID'] = [mobj.group(1)] return self._get_video_info( videoPlayer[0], query, referer=referer) elif 'playerKey' in query: @@ -434,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor): return info -class BrightcoveNewIE(InfoExtractor): +class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/(?P[^/]+)_(?P[^/]+)/index\.html\?.*videoId=(?P\d+|ref:[^&]+)' _TESTS = [{ @@ -449,7 +464,7 @@ class BrightcoveNewIE(InfoExtractor): 'timestamp': 1441391203, 'upload_date': '20150904', 'uploader_id': '929656772001', - 'formats': 'mincount:22', + 'formats': 'mincount:20', }, }, { # with rtmp streams @@ -463,7 +478,7 @@ class BrightcoveNewIE(InfoExtractor): 'timestamp': 1433556729, 'upload_date': '20150606', 'uploader_id': '4036320279001', - 'formats': 'mincount:41', + 'formats': 'mincount:39', }, 'params': { # m3u8 download @@ -484,8 +499,8 @@ class BrightcoveNewIE(InfoExtractor): }] @staticmethod - def _extract_url(webpage): - urls = BrightcoveNewIE._extract_urls(webpage) + def _extract_url(ie, webpage): + urls = BrightcoveNewIE._extract_urls(ie, webpage) return urls[0] if urls else None @staticmethod @@ -508,7 +523,7 @@ class BrightcoveNewIE(InfoExtractor): # [2] looks like: for video, script_tag, account_id, player_id, embed in re.findall( r'''(?isx) - (]+>) + (]*\bdata-video-id\s*=\s*['"]?[^>]+>) (?:.*? (]+ src=["\'](?:https?:)?//players\.brightcove\.net/ @@ -549,45 +564,7 @@ class BrightcoveNewIE(InfoExtractor): return entries - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - self._initialize_geo_bypass(smuggled_data.get('geo_countries')) - - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() - - webpage = self._download_webpage( - 'http://players.brightcove.net/%s/%s_%s/index.min.js' - % (account_id, player_id, embed), video_id) - - policy_key = None - - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) - if catalog: - policy_key = catalog.get('policyKey') - - if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P.+?)\1', - webpage, 'policy key', group='pk') - - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) - try: - json_data = self._download_json(api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise - + def _parse_brightcove_metadata(self, json_data, video_id, headers={}): title = json_data['name'].strip() formats = [] @@ -661,6 +638,9 @@ class BrightcoveNewIE(InfoExtractor): self._sort_formats(formats) + for f in formats: + f.setdefault('http_headers', {}).update(headers) + subtitles = {} for text_track in json_data.get('text_tracks', []): if text_track.get('src'): @@ -670,7 +650,7 @@ class BrightcoveNewIE(InfoExtractor): is_live = False duration = float_or_none(json_data.get('duration'), 1000) - if duration and duration < 0: + if duration is not None and duration <= 0: is_live = True return { @@ -680,9 +660,75 @@ class BrightcoveNewIE(InfoExtractor): 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': account_id, + 'uploader_id': json_data.get('account_id'), 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), 'is_live': is_live, } + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + 'ip_blocks': smuggled_data.get('geo_ip_blocks'), + }) + + account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) + + policy_key = None + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) + if catalog: + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P.+?)\1', + webpage, 'policy key', group='pk') + + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + headers = { + 'Accept': 'application/json;pk=%s' % policy_key, + } + referrer = smuggled_data.get('referrer') + if referrer: + headers.update({ + 'Referer': referrer, + 'Origin': re.search(r'https?://[^/]+', referrer).group(0), + }) + try: + json_data = self._download_json(api_url, video_id, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) + raise + + errors = json_data.get('errors') + if errors and errors[0].get('error_subcode') == 'TVE_AUTH': + custom_fields = json_data['custom_fields'] + tve_token = self._extract_mvpd_auth( + smuggled_data['source_url'], video_id, + custom_fields['bcadobepassrequestorid'], + custom_fields['bcadobepassresourceid']) + json_data = self._download_json( + api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }, query={ + 'tveToken': tve_token, + }) + + return self._parse_brightcove_metadata( + json_data, video_id, headers=headers)