[makertv] improve extraction
authorremitamine <remitamine@gmail.com>
Mon, 21 Dec 2015 03:24:58 +0000 (04:24 +0100)
committerremitamine <remitamine@gmail.com>
Mon, 21 Dec 2015 03:24:58 +0000 (04:24 +0100)
youtube_dl/extractor/generic.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/makertv.py

index 4d38b0c9d1144383b2c2840d8fd9b91f88dadb91..f5dd88f5444f78c860340546eaedcc0ddea9fc0f 100644 (file)
@@ -53,6 +53,7 @@ from .onionstudios import OnionStudiosIE
 from .snagfilms import SnagFilmsEmbedIE
 from .screenwavemedia import ScreenwaveMediaIE
 from .mtv import MTVServicesEmbeddedIE
+from .jwplatform import JWPlatformIE
 
 
 class GenericIE(InfoExtractor):
@@ -1787,6 +1788,11 @@ class GenericIE(InfoExtractor):
         if snagfilms_url:
             return self.url_result(snagfilms_url)
 
+        # Look for JWPlatform embeds
+        jwplatform_url = JWPlatformIE._extract_url(webpage)
+        if jwplatform_url:
+            return self.url_result(jwplatform_url, 'JWPlatform')
+
         # Look for ScreenwaveMedia embeds
         mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
         if mobj is not None:
index 3a3dc439ac4e8dba0b56672fad36511b5e41d5d3..cdc095a79aa6ff8ccf0404293b9011a1f5d4d780 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import int_or_none
 
@@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}',
+            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
             webpage)
         if mobj:
             return mobj.group('url')
@@ -42,7 +44,9 @@ class JWPlatformIE(InfoExtractor):
             source_url = self._proto_relative_url(source['file'])
             source_type = source.get('type') or ''
             if source_type == 'application/vnd.apple.mpegurl':
-                formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None))
+                m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
+                if m3u8_formats:
+                    formats.extend(m3u8_formats)
             elif source_type.startswith('audio'):
                 formats.append({
                     'url': source_url,
@@ -57,7 +61,7 @@ class JWPlatformIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': video_data['mediaid'],
+            'id': video_id,
             'title': video_data['title'],
             'description': video_data.get('description'),
             'thumbnail': self._proto_relative_url(video_data.get('image')),
index 0256e4e243f660cfb97fa50ee87a3716d911b5ef..3c34d4604f20699d99937b29a11aad7f8f4116a4 100644 (file)
@@ -5,12 +5,12 @@ from .common import InfoExtractor
 
 
 class MakerTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
     _TEST = {
         'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
         'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
         'info_dict': {
-            'id': 'brOEcGut',
+            'id': 'Fh3QgymL9gsc',
             'ext': 'mp4',
             'title': 'Maze Runner: The Scorch Trials Official Movie Review',
             'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
@@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id')
+        jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
 
-        return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform')
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'url': 'jwplatform:%s' % jwplatform_id,
+            'ie_key': 'JWPlatform',
+        }