X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=2aa9f4782e0dfdb2b78225c2d1fe83a8568effe3;hp=8e2f7217ab85a81a58d1bb902af02b6e62ec2ab6;hb=2391941f283a1107b01f9df76a8b0e521a5abe3b;hpb=f455a934e9a44aaebdf925b5d78ffafa39e14c97 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 8e2f7217a..2aa9f4782 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,32 +5,34 @@ import base64 import re import struct -from .common import InfoExtractor from .adobepass import AdobePassIE +from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, + compat_HTTPError, compat_parse_qs, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, - compat_HTTPError, ) from ..utils import ( - ExtractorError, + clean_html, extract_attributes, + ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, - js_to_json, int_or_none, + js_to_json, + mimetype2ext, parse_iso8601, smuggle_url, + str_or_none, unescapeHTML, unsmuggle_url, - update_url_query, - clean_html, - mimetype2ext, UnsupportedError, + update_url_query, + url_or_none, ) @@ -424,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE): # [2] looks like: for video, script_tag, account_id, player_id, embed in re.findall( r'''(?isx) - (]*\bdata-video-id\s*=\s*['"]?[^>]+>) + (]*\bdata-video-id\s*=\s*['"]?[^>]+>) (?:.*? (]+ src=["\'](?:https?:)?//players\.brightcove\.net/ @@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE): subtitles = {} for text_track in json_data.get('text_tracks', []): - if text_track.get('src'): - subtitles.setdefault(text_track.get('srclang'), []).append({ - 'url': text_track['src'], - }) + if text_track.get('kind') != 'captions': + continue + text_track_url = url_or_none(text_track.get('src')) + if not text_track_url: + continue + lang = (str_or_none(text_track.get('srclang')) + or str_or_none(text_track.get('label')) or 'en').lower() + subtitles.setdefault(lang, []).append({ + 'url': text_track_url, + }) is_live = False duration = float_or_none(json_data.get('duration'), 1000) @@ -586,45 +594,63 @@ class BrightcoveNewIE(AdobePassIE): account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://players.brightcove.net/%s/%s_%s/index.min.js' - % (account_id, player_id, embed), video_id) + policy_key_id = '%s_%s' % (account_id, player_id) + policy_key = self._downloader.cache.load('brightcove', policy_key_id) + policy_key_extracted = False + store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) - policy_key = None + def extract_policy_key(): + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) + policy_key = None + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) if catalog: - policy_key = catalog.get('policyKey') + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P.+?)\1', + webpage, 'policy key', group='pk') - if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P.+?)\1', - webpage, 'policy key', group='pk') + store_pk(policy_key) + return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } + headers = {} referrer = smuggled_data.get('referrer') if referrer: headers.update({ 'Referer': referrer, 'Origin': re.search(r'https?://[^/]+', referrer).group(0), }) - try: - json_data = self._download_json(api_url, video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise + + for _ in range(2): + if not policy_key: + policy_key = extract_policy_key() + policy_key_extracted = True + headers['Accept'] = 'application/json;pk=%s' % policy_key + try: + json_data = self._download_json(api_url, video_id, headers=headers) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: + policy_key = None + store_pk(None) + continue + raise ExtractorError(message, expected=True) + raise errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH':