Merge remote-tracking branch 'rbrito/swap-dimensions'
[youtube-dl] / youtube_dl / extractor / brightcove.py
index b3c3dc0fda124f74f003e0227b5e24e4d301b468..66fe0ac9ade6fad80d77f0429c136c2d022af16d 100644 (file)
@@ -10,10 +10,12 @@ from ..utils import (
     find_xpath_attr,
     compat_urlparse,
     compat_str,
+    compat_urllib_request,
 
     ExtractorError,
 )
 
+
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -42,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
                 u'uploader': u'Oracle',
             },
         },
+        {
+            # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+            u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+            u'info_dict': {
+                u'id': u'2750934548001',
+                u'ext': u'mp4',
+                u'title': u'This Bracelet Acts as a Personal Thermostat',
+                u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
+                u'uploader': u'Mashable',
+            },
+        },
     ]
 
     @classmethod
@@ -62,13 +75,22 @@ class BrightcoveIE(InfoExtractor):
         params = {'flashID': object_doc.attrib['id'],
                   'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                   }
-        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        def find_param(name):
+            node = find_xpath_attr(object_doc, './param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+            return None
+        playerKey = find_param('playerKey')
         # Not all pages define this value
         if playerKey is not None:
-            params['playerKey'] = playerKey.attrib['value']
-        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+            params['playerKey'] = playerKey
+        # The three fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
         if videoPlayer is not None:
-            params['@videoPlayer'] = videoPlayer.attrib['value']
+            params['@videoPlayer'] = videoPlayer
+        linkBase = find_param('linkBaseURL')
+        if linkBase is not None:
+            params['linkBaseURL'] = linkBase
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
 
@@ -86,20 +108,28 @@ class BrightcoveIE(InfoExtractor):
             return None
 
     def _real_extract(self, url):
+        # Change the 'videoId' and others field to '@videoPlayer'
+        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+        # Change bckey (used by bcove.me urls) to playerKey
+        url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
         mobj = re.match(self._VALID_URL, url)
         query_str = mobj.group('query')
         query = compat_urlparse.parse_qs(query_str)
 
         videoPlayer = query.get('@videoPlayer')
         if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str)
+            return self._get_video_info(videoPlayer[0], query_str, query)
         else:
             player_key = query['playerKey']
             return self._get_playlist_info(player_key[0])
 
-    def _get_video_info(self, video_id, query):
-        request_url = self._FEDERATED_URL_TEMPLATE % query
-        webpage = self._download_webpage(request_url, video_id)
+    def _get_video_info(self, video_id, query_str, query):
+        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        req = compat_urllib_request.Request(request_url)
+        linkBase = query.get('linkBaseURL')
+        if linkBase is not None:
+            req.add_header('Referer', linkBase[0])
+        webpage = self._download_webpage(req, video_id)
 
         self.report_extraction(video_id)
         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -133,10 +163,11 @@ class BrightcoveIE(InfoExtractor):
         renditions = video_info.get('renditions')
         if renditions:
             renditions = sorted(renditions, key=lambda r: r['size'])
-            best_format = renditions[-1]
-            info.update({
-                'url': best_format['defaultURL'],
-            })
+            info['formats'] = [{
+                'url': rend['defaultURL'],
+                'height': rend.get('frameHeight'),
+                'width': rend.get('frameWidth'),
+            } for rend in renditions]
         elif video_info.get('FLVFullLengthURL') is not None:
             info.update({
                 'url': video_info['FLVFullLengthURL'],