[kaltura] Add _extract_url with fixed regex
authorSergey M․ <dstftw@gmail.com>
Mon, 27 Jun 2016 15:44:17 +0000 (22:44 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 27 Jun 2016 15:44:17 +0000 (22:44 +0700)
youtube_dl/extractor/kaltura.py

index a65697ff558864f36cc5e8b8f82f959b19ea16fc..c75a958ba8aab7e78274daf6ac2c0c4d47534d9b 100644 (file)
@@ -64,6 +64,32 @@ class KalturaIE(InfoExtractor):
         }
     ]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = (
+            re.search(
+                r"""(?xs)
+                    kWidget\.(?:thumb)?[Ee]mbed\(
+                    \{.*?
+                        (?P<q1>['\"])wid(?P=q1)\s*:\s*
+                        (?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
+                        (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
+                        (?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
+                """, webpage) or
+            re.search(
+                r'''(?xs)
+                    (?P<q1>["\'])
+                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
+                    (?P=q1).*?
+                    (?:
+                        entry_?[Ii]d|
+                        (?P<q2>["\'])entry_?[Ii]d(?P=q2)
+                    )\s*:\s*
+                    (?P<q3>["\'])(?P<id>.+?)(?P=q3)
+                ''', webpage))
+        if mobj:
+            return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict()
+
     def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
         params = actions[0]
         if len(actions) > 1: