Merge remote-tracking branch 'rzhxeo/crunchyroll'
[youtube-dl] / youtube_dl / extractor / brightcove.py
index 0c6e13b9cbc33eb7ffecb95b9b84461c3bf5ec2c..b1b7526ca98f03cd44707a0f966567a1a0d363b8 100644 (file)
@@ -10,10 +10,12 @@ from ..utils import (
     find_xpath_attr,
     compat_urlparse,
     compat_str,
+    compat_urllib_request,
 
     ExtractorError,
 )
 
+
 class BrightcoveIE(InfoExtractor):
     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -53,6 +55,18 @@ class BrightcoveIE(InfoExtractor):
                 u'uploader': u'Mashable',
             },
         },
+        {
+            # test that the default referer works
+            # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
+            u'url': u'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
+            u'info_dict': {
+                u'id': u'2878862109001',
+                u'ext': u'mp4',
+                u'title': u'Lost in Motion II',
+                u'description': u'md5:363109c02998fee92ec02211bd8000df',
+                u'uploader': u'National Ballet of Canada',
+            },
+        },
     ]
 
     @classmethod
@@ -73,13 +87,22 @@ class BrightcoveIE(InfoExtractor):
         params = {'flashID': object_doc.attrib['id'],
                   'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                   }
-        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        def find_param(name):
+            node = find_xpath_attr(object_doc, './param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+            return None
+        playerKey = find_param('playerKey')
         # Not all pages define this value
         if playerKey is not None:
-            params['playerKey'] = playerKey.attrib['value']
-        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+            params['playerKey'] = playerKey
+        # The three fields hold the id of the video
+        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
         if videoPlayer is not None:
-            params['@videoPlayer'] = videoPlayer.attrib['value']
+            params['@videoPlayer'] = videoPlayer
+        linkBase = find_param('linkBaseURL')
+        if linkBase is not None:
+            params['linkBaseURL'] = linkBase
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
 
@@ -97,22 +120,32 @@ class BrightcoveIE(InfoExtractor):
             return None
 
     def _real_extract(self, url):
-        # Change the 'videoId' or 'videoID' field to '@videoPlayer'
-        url = re.sub(r'(?<=[?&])videoI(d|D)', '%40videoPlayer', url)
+        # Change the 'videoId' and others field to '@videoPlayer'
+        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+        # Change bckey (used by bcove.me urls) to playerKey
+        url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
         mobj = re.match(self._VALID_URL, url)
         query_str = mobj.group('query')
         query = compat_urlparse.parse_qs(query_str)
 
         videoPlayer = query.get('@videoPlayer')
         if videoPlayer:
-            return self._get_video_info(videoPlayer[0], query_str)
+            return self._get_video_info(videoPlayer[0], query_str, query,
+                # We set the original url as the default 'Referer' header
+                referer=url)
         else:
             player_key = query['playerKey']
             return self._get_playlist_info(player_key[0])
 
-    def _get_video_info(self, video_id, query):
-        request_url = self._FEDERATED_URL_TEMPLATE % query
-        webpage = self._download_webpage(request_url, video_id)
+    def _get_video_info(self, video_id, query_str, query, referer=None):
+        request_url = self._FEDERATED_URL_TEMPLATE % query_str
+        req = compat_urllib_request.Request(request_url)
+        linkBase = query.get('linkBaseURL')
+        if linkBase is not None:
+            referer = linkBase[0]
+        if referer is not None:
+            req.add_header('Referer', referer)
+        webpage = self._download_webpage(req, video_id)
 
         self.report_extraction(video_id)
         info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -146,10 +179,11 @@ class BrightcoveIE(InfoExtractor):
         renditions = video_info.get('renditions')
         if renditions:
             renditions = sorted(renditions, key=lambda r: r['size'])
-            best_format = renditions[-1]
-            info.update({
-                'url': best_format['defaultURL'],
-            })
+            info['formats'] = [{
+                'url': rend['defaultURL'],
+                'height': rend.get('frameHeight'),
+                'width': rend.get('frameWidth'),
+            } for rend in renditions]
         elif video_info.get('FLVFullLengthURL') is not None:
             info.update({
                 'url': video_info['FLVFullLengthURL'],