[jwplatform] Improve embeds extraction (closes #25467)
[youtube-dl] / youtube_dl / extractor / jwplatform.py
index 63d0dc998cf1cf281dda3c27f3afaae84f4906c9..dfa07e42354b62044e8587e41c46163693d50107 100644 (file)
@@ -7,8 +7,8 @@ from .common import InfoExtractor
 
 
 class JWPlatformIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
-    _TEST = {
+    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+    _TESTS = [{
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
         'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
         'info_dict': {
@@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor):
             'upload_date': '20081127',
             'timestamp': 1227796140,
         }
-    }
+    }, {
+        'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+        'only_matching': True,
+    }]
 
     @staticmethod
     def _extract_url(webpage):
@@ -29,10 +32,10 @@ class JWPlatformIE(InfoExtractor):
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
-            r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
+            r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
             webpage)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+        json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
         return self._parse_jwplayer_data(json_data, video_id)