X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbrightcove.py;h=83eec84d3cd446b75854accd8dd8c2c754ba4349;hb=ea38e55fff639545394e32208a7dabc7e6258166;hp=e13c040f8bc24768d68ae13fa185ceaf0224d41d;hpb=d614aa40e35825e1cde7c92fc6092d226afe4898;p=youtube-dl diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index e13c040f8..83eec84d3 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -17,13 +17,13 @@ from ..utils import ( ExtractorError, unsmuggle_url, + unescapeHTML, ) class BrightcoveIE(InfoExtractor): _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' - _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' _TESTS = [ { @@ -70,7 +70,7 @@ class BrightcoveIE(InfoExtractor): 'description': 'md5:363109c02998fee92ec02211bd8000df', 'uploader': 'National Ballet of Canada', }, - }, + } ] @classmethod @@ -90,9 +90,12 @@ class BrightcoveIE(InfoExtractor): object_doc = xml.etree.ElementTree.fromstring(object_str) fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') - flashvars = dict( - (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + if fv_el is not None: + flashvars = dict( + (k, v[0]) + for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + else: + flashvars = {} def find_param(name): if name in flashvars: @@ -125,20 +128,28 @@ class BrightcoveIE(InfoExtractor): @classmethod def _extract_brightcove_url(cls, webpage): - """Try to extract the brightcove url from the wepbage, returns None + """Try to extract the brightcove url from the webpage, returns None if it can't be found """ - m_brightcove = re.search( + urls = cls._extract_brightcove_urls(webpage) + return urls[0] if urls else None + + @classmethod + def _extract_brightcove_urls(cls, webpage): + """Return a list of all Brightcove URLs from the webpage """ + + url_m = re.search(r']+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | + [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | [^>]*?>\s*''', webpage) - if m_brightcove is not None: - return cls._build_brighcove_url(m_brightcove.group()) - else: - return None + return [cls._build_brighcove_url(m) for m in matches] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -180,8 +191,9 @@ class BrightcoveIE(InfoExtractor): return self._extract_video_info(video_info) def _get_playlist_info(self, player_key): - playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, - player_key, 'Downloading playlist information') + info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key + playlist_info = self._download_webpage( + info_url, player_key, 'Downloading playlist information') json_data = json.loads(playlist_info) if 'videoList' not in json_data: @@ -195,7 +207,7 @@ class BrightcoveIE(InfoExtractor): def _extract_video_info(self, video_info): info = { 'id': compat_str(video_info['id']), - 'title': video_info['displayName'], + 'title': video_info['displayName'].strip(), 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 'uploader': video_info.get('publisherName'),