X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=2aa9f4782e0dfdb2b78225c2d1fe83a8568effe3;hp=9553f82d663fd169b793c3a28fe8564b3ca5e9fc;hb=2391941f283a1107b01f9df76a8b0e521a5abe3b;hpb=75ef77c1b18e943933a635ba28a47ec4c9671504 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 9553f82d6..2aa9f4782 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -5,32 +5,34 @@ import base64 import re import struct -from .common import InfoExtractor from .adobepass import AdobePassIE +from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, + compat_HTTPError, compat_parse_qs, compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, - compat_HTTPError, ) from ..utils import ( - ExtractorError, + clean_html, extract_attributes, + ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, - js_to_json, int_or_none, + js_to_json, + mimetype2ext, parse_iso8601, smuggle_url, + str_or_none, unescapeHTML, unsmuggle_url, - update_url_query, - clean_html, - mimetype2ext, UnsupportedError, + update_url_query, + url_or_none, ) @@ -424,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE): # [2] looks like: for video, script_tag, account_id, player_id, embed in re.findall( r'''(?isx) - (]*\bdata-video-id\s*=\s*['"]?[^>]+>) + (]*\bdata-video-id\s*=\s*['"]?[^>]+>) (?:.*? (]+ src=["\'](?:https?:)?//players\.brightcove\.net/ @@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE): subtitles = {} for text_track in json_data.get('text_tracks', []): - if text_track.get('src'): - subtitles.setdefault(text_track.get('srclang'), []).append({ - 'url': text_track['src'], - }) + if text_track.get('kind') != 'captions': + continue + text_track_url = url_or_none(text_track.get('src')) + if not text_track_url: + continue + lang = (str_or_none(text_track.get('srclang')) + or str_or_none(text_track.get('label')) or 'en').lower() + subtitles.setdefault(lang, []).append({ + 'url': text_track_url, + }) is_live = False duration = float_or_none(json_data.get('duration'), 1000) @@ -588,11 +596,16 @@ class BrightcoveNewIE(AdobePassIE): policy_key_id = '%s_%s' % (account_id, player_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id) - if not policy_key: + policy_key_extracted = False + store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) + + def extract_policy_key(): webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id) + policy_key = None + catalog = self._search_regex( r'catalog\(({.+?})\);', webpage, 'catalog', default=None) if catalog: @@ -605,28 +618,39 @@ class BrightcoveNewIE(AdobePassIE): policy_key = self._search_regex( r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') - self._downloader.cache.store('brightcove', policy_key_id, policy_key) + + store_pk(policy_key) + return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } + headers = {} referrer = smuggled_data.get('referrer') if referrer: headers.update({ 'Referer': referrer, 'Origin': re.search(r'https?://[^/]+', referrer).group(0), }) - try: - json_data = self._download_json(api_url, video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise + + for _ in range(2): + if not policy_key: + policy_key = extract_policy_key() + policy_key_extracted = True + headers['Accept'] = 'application/json;pk=%s' % policy_key + try: + json_data = self._download_json(api_url, video_id, headers=headers) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: + policy_key = None + store_pk(None) + continue + raise ExtractorError(message, expected=True) + raise errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH':