Unify coding cookie
[youtube-dl] / youtube_dl / extractor / screenwavemedia.py
index d1ab66b3216d5153a5480769fb0723919f3fdb37..7d77e8825d7420185ed1f9f707efb3462f19cab1 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -7,12 +7,13 @@ from .common import InfoExtractor
 from ..utils import (
     int_or_none,
     unified_strdate,
+    js_to_json,
 )
 
 
 class ScreenwaveMediaIE(InfoExtractor):
-    _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
-
+    _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
+    EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
     _TESTS = [{
         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
         'only_matching': True,
@@ -22,60 +23,74 @@ class ScreenwaveMediaIE(InfoExtractor):
         video_id = self._match_id(url)
 
         playerdata = self._download_webpage(
-            'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
+            'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
             video_id, 'Downloading player webpage')
 
         vidtitle = self._search_regex(
             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
-        vidurl = self._search_regex(
-            r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
-
-        videolist_url = None
-
-        mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
-        if mobj:
-            videoserver = mobj.group('videoserver')
-            mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
-            vidid = mobj.group('vidid') if mobj else video_id
-            videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
-        else:
-            mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
-            if mobj:
-                videolist_url = mobj.group('smil')
-
-        if videolist_url:
-            videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
-            formats = []
-            baseurl = vidurl[:vidurl.rfind('/') + 1]
-            for video in videolist.findall('.//video'):
-                src = video.get('src')
-                if not src:
+
+        playerconfig = self._download_webpage(
+            'http://player.screenwavemedia.com/player.js',
+            video_id, 'Downloading playerconfig webpage')
+
+        videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
+
+        sources = self._parse_json(
+            js_to_json(
+                re.sub(
+                    r'(?s)/\*.*?\*/', '',
+                    self._search_regex(
+                        r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
+                        'sources',
+                    ).replace(
+                        "' + thisObj.options.videoserver + '",
+                        videoserver
+                    ).replace(
+                        "' + playerVidId + '",
+                        video_id
+                    )
+                )
+            ),
+            video_id, fatal=False
+        )
+
+        # Fallback to hardcoded sources if JS changes again
+        if not sources:
+            self.report_warning('Falling back to a hardcoded list of streams')
+            sources = [{
+                'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
+                'type': 'mp4',
+                'label': format_label,
+            } for format_id, format_label in (
+                ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
+            sources.append({
+                'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
+                'type': 'hls',
+            })
+
+        formats = []
+        for source in sources:
+            file_ = source.get('file')
+            if not file_:
+                continue
+            if source.get('type') == 'hls':
+                formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
+            else:
+                format_id = self._search_regex(
+                    r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
+                if not self._is_valid_url(file_, video_id, format_id or 'video'):
                     continue
-                file_ = src.partition(':')[-1]
-                width = int_or_none(video.get('width'))
-                height = int_or_none(video.get('height'))
-                bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
-                format = {
-                    'url': baseurl + file_,
-                    'format_id': src.rpartition('.')[0].rpartition('_')[-1],
-                }
-                if width or height:
-                    format.update({
-                        'tbr': bitrate,
-                        'width': width,
-                        'height': height,
-                    })
-                else:
-                    format.update({
-                        'abr': bitrate,
-                        'vcodec': 'none',
-                    })
-                formats.append(format)
-        else:
-            formats = [{
-                'url': vidurl,
-            }]
-        self._sort_formats(formats)
+                format_label = source.get('label')
+                height = int_or_none(self._search_regex(
+                    r'^(\d+)[pP]', format_label, 'height', default=None))
+                formats.append({
+                    'url': file_,
+                    'format_id': format_id,
+                    'format': format_label,
+                    'ext': source.get('type'),
+                    'height': height,
+                })
+        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
 
         return {
             'id': video_id,
@@ -94,7 +109,11 @@ class TeamFourIE(InfoExtractor):
             'upload_date': '20130401',
             'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
             'title': 'A Moment With TFS Episode 4',
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):