[syfy] fix extraction(closes #9087)(closes #3820)(closes #2388)
authorRemita Amine <remitamine@gmail.com>
Thu, 14 Jul 2016 22:59:12 +0000 (23:59 +0100)
committerRemita Amine <remitamine@gmail.com>
Thu, 14 Jul 2016 22:59:47 +0000 (23:59 +0100)
youtube_dl/extractor/syfy.py

index 5ca079f880717933a4216de6399046a44970d29b..53723b66eac2452cf9f99a913c0b072aa8b1362f 100644 (file)
@@ -1,46 +1,56 @@
 from __future__ import unicode_literals
 
-import re
+from .theplatform import ThePlatformIE
+from ..utils import (
+    update_url_query,
+    smuggle_url,
+)
 
-from .common import InfoExtractor
-
-
-class SyfyIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
 
+class SyfyIE(ThePlatformIE):
+    _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
     _TESTS = [{
-        'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
+        'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
         'info_dict': {
-            'id': 'NmqMrGnXvmO1',
-            'ext': 'flv',
-            'title': 'George Lucas has Advice for his Daughter',
-            'description': 'Listen to what insights George Lucas give his daughter Amanda.',
+            'id': '2968097',
+            'ext': 'mp4',
+            'title': 'The Internet Ruined My Life: Season 1 Trailer',
+            'description': 'One tweet, one post, one click, can destroy everything.',
+            'uploader': 'NBCU-MPAT',
+            'upload_date': '20170113',
+            'timestamp': 1484345640,
         },
-        'add_ie': ['ThePlatform'],
-    }, {
-        'url': 'http://www.syfy.com/wilwheaton',
-        'md5': '94dfa54ee3ccb63295b276da08c415f6',
-        'info_dict': {
-            'id': '4yoffOOXC767',
-            'ext': 'flv',
-            'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
-            'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
-        'skip': 'Blocked outside the US',
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_name = mobj.group('video_name')
-        if video_name:
-            generic_webpage = self._download_webpage(url, video_name)
-            video_id = self._search_regex(
-                r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
-                generic_webpage, 'video ID')
-            url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
-                video_name, video_name, video_id)
-        else:
-            video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        return self.url_result(self._og_search_video_url(webpage))
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        syfy_mpx = list(self._parse_json(self._search_regex(
+            r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
+            display_id)['syfy']['syfy_mpx'].values())[0]
+        video_id = syfy_mpx['mpxGUID']
+        title = syfy_mpx['episodeTitle']
+        query = {
+            'mbr': 'true',
+            'manifest': 'm3u',
+        }
+        if syfy_mpx.get('entitlement') == 'auth':
+            resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
+            query['auth'] = self._extract_mvpd_auth(
+                url, video_id, 'syfy', resource)
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'ThePlatform',
+            'url': smuggle_url(update_url_query(
+                self._proto_relative_url(syfy_mpx['releaseURL']), query),
+                {'force_smil_url': True}),
+            'title': title,
+            'id': video_id,
+            'display_id': display_id,
+        }