[brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter...
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 7 Nov 2013 20:06:48 +0000 (21:06 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 7 Nov 2013 20:06:48 +0000 (21:06 +0100)
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/generic.py

index 0e60271f15935f035096cb4d8d67acd5a46c405f..d8c35465a34fa4c4d4ca822d499892504a51ce62 100644 (file)
@@ -10,10 +10,12 @@ from ..utils import (
     find_xpath_attr,
     compat_urlparse,
     compat_str,
+    compat_urllib_request,
 
     ExtractorError,
 )
 
+
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -80,6 +82,9 @@ class BrightcoveIE(InfoExtractor):
         videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
         if videoPlayer is not None:
             params['@videoPlayer'] = videoPlayer.attrib['value']
+        linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+        if linkBase is not None:
+            params['linkBaseURL'] = linkBase.attrib['value']
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
 
@@ -107,14 +112,18 @@ class BrightcoveIE(InfoExtractor):
 
         videoPlayer = query.get('@videoPlayer')
         if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str)
+            return self._get_video_info(videoPlayer[0], query_str, query)
         else:
             player_key = query['playerKey']
             return self._get_playlist_info(player_key[0])
 
-    def _get_video_info(self, video_id, query):
-        request_url = self._FEDERATED_URL_TEMPLATE % query
-        webpage = self._download_webpage(request_url, video_id)
+    def _get_video_info(self, video_id, query_str, query):
+        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        req = compat_urllib_request.Request(request_url)
+        linkBase = query.get('linkBaseURL')
+        if linkBase is not None:
+            req.add_header('Referer', linkBase[0])
+        webpage = self._download_webpage(req, video_id)
 
         self.report_extraction(video_id)
         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
index 04b7212f4b5406d09357f726391a7961be0a48bf..c7552fddb587a60454bec6faa174c36bd4aa9a4a 100644 (file)
@@ -55,15 +55,17 @@ class GenericIE(InfoExtractor):
             u'skip': u'There is a limit of 200 free downloads / month for the test song',
         },
         # embedded brightcove video
+        # it also tests brightcove videos that need to set the 'Referer' in the
+        # http requests
         {
             u'add_ie': ['Brightcove'],
-            u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
+            u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
             u'info_dict': {
-                u'id': u'2365799484001',
+                u'id': u'2765128793001',
                 u'ext': u'mp4',
-                u'title': u'Bubble Simulation',
-                u'description': u'A visualization from a new computer model of foam behavior.',
-                u'uploader': u'Scientific American',
+                u'title': u'Le cours de bourse : l’analyse technique',
+                u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
+                u'uploader': u'BFM BUSINESS',
             },
             u'params': {
                 u'skip_download': True,