[kaltura] add support videos stored on custom kaltura servers(closes #5557)
authorRemita Amine <remitamine@gmail.com>
Mon, 4 Jul 2016 16:57:44 +0000 (17:57 +0100)
committerRemita Amine <remitamine@gmail.com>
Mon, 4 Jul 2016 16:59:58 +0000 (17:59 +0100)
youtube_dl/extractor/generic.py
youtube_dl/extractor/kaltura.py
youtube_dl/utils.py

index a9b61bf13cd364d96255401fa5ce812c8db219e1..764697bd29577c8b59b716c233e1a42baddb9b0c 100644 (file)
@@ -1295,6 +1295,21 @@ class GenericIE(InfoExtractor):
                 'uploader': 'cylus cyrus',
             },
         },
+        {
+            # video stored on custom kaltura server
+            'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+            'md5': '537617d06e64dfed891fa1593c4b30cc',
+            'info_dict': {
+                'id': '0_1iotm5bh',
+                'ext': 'mp4',
+                'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+                'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+                'uploader_id': 'videos.expansion@el-mundo.net',
+                'upload_date': '20150429',
+                'timestamp': 1430303472,
+            },
+            'add_ie': ['Kaltura'],
+        },
     ]
 
     def report_following_redirect(self, new_url):
index c75a958ba8aab7e78274daf6ac2c0c4d47534d9b..147bb8cf08f836c70f4dc63ed83569010df1c4f7 100644 (file)
@@ -6,7 +6,6 @@ import base64
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urllib_parse_urlencode,
     compat_urlparse,
     compat_parse_qs,
 )
@@ -15,6 +14,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     unsmuggle_url,
+    smuggle_url,
 )
 
 
@@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
                         )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
                 )
                 '''
-    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+    _SERVICE_URL = 'http://cdnapi.kaltura.com'
+    _SERVICE_BASE = '/api_v3/index.php'
     _TESTS = [
         {
             'url': 'kaltura:269692:1_1jc2y3e4',
@@ -88,7 +89,14 @@ class KalturaIE(InfoExtractor):
                     (?P<q3>["\'])(?P<id>.+?)(?P=q3)
                 ''', webpage))
         if mobj:
-            return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict()
+            embed_info = mobj.groupdict()
+            url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+            service_url = re.search(
+                '<script[^>]+src=(?:["\'])((?:https?:)?//.+?)/p/%(partner_id)s/sp/%(partner_id)s00/embedIframeJs' % embed_info,
+                webpage)
+            if service_url:
+                url = smuggle_url(url, {'service_url': service_url.group(1)})
+            return url
 
     def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
         params = actions[0]
@@ -97,9 +105,9 @@ class KalturaIE(InfoExtractor):
                 for k, v in a.items():
                     params['%d:%s' % (i, k)] = v
 
-        query = compat_urllib_parse_urlencode(params)
-        url = self._API_BASE + query
-        data = self._download_json(url, video_id, *args, **kwargs)
+        data = self._download_json(
+            self._SERVICE_URL + self._SERVICE_BASE,
+            video_id, query=params, *args, **kwargs)
 
         status = data if len(actions) == 1 else data[0]
         if status.get('objectType') == 'KalturaAPIException':
@@ -148,6 +156,9 @@ class KalturaIE(InfoExtractor):
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
+        service_url = smuggled_data.get('service_url')
+        if service_url:
+            self._SERVICE_URL = service_url
 
         mobj = re.match(self._VALID_URL, url)
         partner_id, entry_id = mobj.group('partner_id', 'id')
@@ -201,12 +212,17 @@ class KalturaIE(InfoExtractor):
                 unsigned_url += '?referrer=%s' % referrer
             return unsigned_url
 
+        data_url = info['dataUrl']
+        if '/flvclipper/' in data_url:
+            data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
         formats = []
         for f in flavor_assets:
             # Continue if asset is not ready
             if f['status'] != 2:
                 continue
-            video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
+            video_url = sign_url(
+                '%s/flavorId/%s' % (data_url, f['id']))
             formats.append({
                 'format_id': '%(fileExt)s-%(bitrate)s' % f,
                 'ext': f.get('fileExt'),
@@ -219,9 +235,12 @@ class KalturaIE(InfoExtractor):
                 'width': int_or_none(f.get('width')),
                 'url': video_url,
             })
-        m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+        if '/playManifest/' in data_url:
+            m3u8_url = sign_url(data_url.replace(
+                'format/url', 'format/applehttp'))
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, entry_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         self._check_formats(formats, entry_id)
         self._sort_formats(formats)
index 495878a0e5286c37828406a2140043bd6433d4fe..d302f39e471f2273e8aadfff93069560c7c8adf8 100644 (file)
@@ -1444,6 +1444,8 @@ def shell_quote(args):
 def smuggle_url(url, data):
     """ Pass additional data in a URL for internal use. """
 
+    url, idata = unsmuggle_url(url, {})
+    data.update(idata)
     sdata = compat_urllib_parse_urlencode(
         {'__youtubedl_smuggle': json.dumps(data)})
     return url + '#' + sdata