[kaltura] add support videos stored on custom kaltura servers(closes #5557)
[youtube-dl] / youtube_dl / extractor / kaltura.py
index c75a958ba8aab7e78274daf6ac2c0c4d47534d9b..147bb8cf08f836c70f4dc63ed83569010df1c4f7 100644 (file)
@@ -6,7 +6,6 @@ import base64
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_urlencode,
     compat_urlparse,
     compat_parse_qs,
 )
@@ -15,6 +14,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     unsmuggle_url,
+    smuggle_url,
 )
 
 
@@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
                         )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
                 )
                 '''
-    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+    _SERVICE_URL = 'http://cdnapi.kaltura.com'
+    _SERVICE_BASE = '/api_v3/index.php'
     _TESTS = [
         {
             'url': 'kaltura:269692:1_1jc2y3e4',
@@ -88,7 +89,14 @@ class KalturaIE(InfoExtractor):
                     (?P<q3>["\'])(?P<id>.+?)(?P=q3)
                 ''', webpage))
         if mobj:
-            return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict()
+            embed_info = mobj.groupdict()
+            url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+            service_url = re.search(
+                '<script[^>]+src=(?:["\'])((?:https?:)?//.+?)/p/%(partner_id)s/sp/%(partner_id)s00/embedIframeJs' % embed_info,
+                webpage)
+            if service_url:
+                url = smuggle_url(url, {'service_url': service_url.group(1)})
+            return url
 
     def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
         params = actions[0]
@@ -97,9 +105,9 @@ class KalturaIE(InfoExtractor):
                 for k, v in a.items():
                     params['%d:%s' % (i, k)] = v
 
-        query = compat_urllib_parse_urlencode(params)
-        url = self._API_BASE + query
-        data = self._download_json(url, video_id, *args, **kwargs)
+        data = self._download_json(
+            self._SERVICE_URL + self._SERVICE_BASE,
+            video_id, query=params, *args, **kwargs)
 
         status = data if len(actions) == 1 else data[0]
         if status.get('objectType') == 'KalturaAPIException':
@@ -148,6 +156,9 @@ class KalturaIE(InfoExtractor):
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
+        service_url = smuggled_data.get('service_url')
+        if service_url:
+            self._SERVICE_URL = service_url
 
         mobj = re.match(self._VALID_URL, url)
         partner_id, entry_id = mobj.group('partner_id', 'id')
@@ -201,12 +212,17 @@ class KalturaIE(InfoExtractor):
                 unsigned_url += '?referrer=%s' % referrer
             return unsigned_url
 
+        data_url = info['dataUrl']
+        if '/flvclipper/' in data_url:
+            data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
         formats = []
         for f in flavor_assets:
             # Continue if asset is not ready
             if f['status'] != 2:
                 continue
-            video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
+            video_url = sign_url(
+                '%s/flavorId/%s' % (data_url, f['id']))
             formats.append({
                 'format_id': '%(fileExt)s-%(bitrate)s' % f,
                 'ext': f.get('fileExt'),
@@ -219,9 +235,12 @@ class KalturaIE(InfoExtractor):
                 'width': int_or_none(f.get('width')),
                 'url': video_url,
             })
-        m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        if '/playManifest/' in data_url:
+            m3u8_url = sign_url(data_url.replace(
+                'format/url', 'format/applehttp'))
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, entry_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         self._check_formats(formats, entry_id)
         self._sort_formats(formats)