[showroomlive] Improve (closes #11458)
authorSergey M․ <dstftw@gmail.com>
Thu, 29 Dec 2016 17:12:35 +0000 (00:12 +0700)
committerSergey M․ <dstftw@gmail.com>
Thu, 29 Dec 2016 17:12:35 +0000 (00:12 +0700)
youtube_dl/extractor/showroomlive.py

index 8bfae510beb82d1509649d2b626af9424797b962..efd9d561ffcf24bc922fd65f0eef0f4aff5ba091 100644 (file)
@@ -2,79 +2,83 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urlparse
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    urljoin,
+)
 
 
-class ShowroomLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P<id>[0-9a-zA-Z_]+)'
+class ShowRoomLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'https://www.showroom-live.com/48_Nana_Okada',
-        'skip': 'Only live broadcasts, can\'t predict test case.',
-        'info_dict': {
-            'id': '48_Nana_Okada',
-            'ext': 'mp4',
-            'uploader_id': '48_Nana_Okada',
-        }
+        'only_matching': True,
     }
 
     def _real_extract(self, url):
         broadcaster_id = self._match_id(url)
 
-        # There is no showroom on these pages.
-        if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']:
-            raise ExtractorError('URL %s does not contain a showroom' % url)
-
-        # Retrieve the information we need
         webpage = self._download_webpage(url, broadcaster_id)
-        room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id')
-        room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id
-        room = self._download_json(room_url, broadcaster_id)
 
-        is_live = room.get('is_onlive')
-        if not is_live:
-            raise ExtractorError('%s their showroom is not live' % broadcaster_id)
+        room_id = self._search_regex(
+            (r'SrGlobal\.roomId\s*=\s*(\d+)',
+             r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
 
-        # Prepare and return the information
-        uploader = room.get('performer_name') or broadcaster_id  # performer_name can be an empty string.
-        title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader))
+        room = self._download_json(
+            urljoin(url, '/api/room/profile?room_id=%s' % room_id),
+            broadcaster_id)
 
-        return {
-            'is_live': is_live,
-            'id': str(room.get('live_id')),
-            'timestamp': room.get('current_live_started_at'),
-            'uploader': uploader,
-            'uploader_id': broadcaster_id,
-            'title': title,
-            'description': room.get('description'),
-            'formats': self._extract_formats(url, broadcaster_id, room_id)
-        }
+        is_live = room.get('is_onlive')
+        if is_live is not True:
+            raise ExtractorError('%s is offline' % broadcaster_id, expected=True)
 
-    def _extract_formats(self, url, broadcaster_id, room_id):
-        formats = []
+        uploader = room.get('performer_name') or broadcaster_id
+        title = room.get('room_name') or room.get('main_name') or uploader
 
-        stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id
-        streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', [])
+        streaming_url_list = self._download_json(
+            urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
+            broadcaster_id)['streaming_url_list']
 
+        formats = []
         for stream in streaming_url_list:
-            if stream.get('type') == "hls":
-                formats.extend(self._extract_m3u8_formats(
-                    stream.get('url'),
-                    broadcaster_id,
-                    ext='mp4',
-                    m3u8_id='hls',
-                    preference=stream.get('quality', 100),
-                    live=True
-                ))
-            elif stream.get('type') == 'rtmp':
-                url = stream.get('url') + '/' + stream.get('stream_name')
+            stream_url = stream.get('url')
+            if not stream_url:
+                continue
+            stream_type = stream.get('type')
+            if stream_type == 'hls':
+                m3u8_formats = self._extract_m3u8_formats(
+                    stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
+                    live=True)
+                for f in m3u8_formats:
+                    f['quality'] = int_or_none(stream.get('quality', 100))
+                formats.extend(m3u8_formats)
+            elif stream_type == 'rtmp':
+                stream_name = stream.get('stream_name')
+                if not stream_name:
+                    continue
                 formats.append({
-                    'url': url,
-                    'format_id': 'rtmp',
-                    'protocol': 'rtmp',
+                    'url': stream_url,
+                    'play_path': stream_name,
+                    'page_url': url,
+                    'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
+                    'rtmp_live': True,
                     'ext': 'flv',
-                    'preference': stream.get('quality', 100),
-                    'format_note': stream.get('label')
+                    'format_id': 'rtmp',
+                    'format_note': stream.get('label'),
+                    'quality': int_or_none(stream.get('quality', 100)),
                 })
-
         self._sort_formats(formats)
-        return formats
+
+        return {
+            'id': compat_str(room.get('live_id') or broadcaster_id),
+            'title': self._live_title(title),
+            'description': room.get('description'),
+            'timestamp': int_or_none(room.get('current_live_started_at')),
+            'uploader': uploader,
+            'uploader_id': broadcaster_id,
+            'view_count': int_or_none(room.get('view_num')),
+            'formats': formats,
+            'is_live': True,
+        }