[showroomlive] Add extractor
[youtube-dl] / youtube_dl / extractor / showroomlive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError, compat_urlparse
6
7
8 class ShowroomLiveIE(InfoExtractor):
9     _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P<id>[0-9a-zA-Z_]+)'
10     _TEST = {
11         'url': 'https://www.showroom-live.com/48_Nana_Okada',
12         'skip': 'Only live broadcasts, can\'t predict test case.',
13         'info_dict': {
14             'id': '48_Nana_Okada',
15             'ext': 'mp4',
16             'uploader_id': '48_Nana_Okada',
17         }
18     }
19
20     def _real_extract(self, url):
21         broadcaster_id = self._match_id(url)
22
23         # There is no showroom on these pages.
24         if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']:
25             raise ExtractorError('URL %s does not contain a showroom' % url)
26
27         # Retrieve the information we need
28         webpage = self._download_webpage(url, broadcaster_id)
29         room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id')
30         room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id
31         room = self._download_json(room_url, broadcaster_id)
32
33         is_live = room.get('is_onlive')
34         if not is_live:
35             raise ExtractorError('%s their showroom is not live' % broadcaster_id)
36
37         # Prepare and return the information
38         uploader = room.get('performer_name') or broadcaster_id  # performer_name can be an empty string.
39         title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader))
40
41         return {
42             'is_live': is_live,
43             'id': str(room.get('live_id')),
44             'timestamp': room.get('current_live_started_at'),
45             'uploader': uploader,
46             'uploader_id': broadcaster_id,
47             'title': title,
48             'description': room.get('description'),
49             'formats': self._extract_formats(url, broadcaster_id, room_id)
50         }
51
52     def _extract_formats(self, url, broadcaster_id, room_id):
53         formats = []
54
55         stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id
56         streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', [])
57
58         for stream in streaming_url_list:
59             if stream.get('type') == "hls":
60                 formats.extend(self._extract_m3u8_formats(
61                     stream.get('url'),
62                     broadcaster_id,
63                     ext='mp4',
64                     m3u8_id='hls',
65                     preference=stream.get('quality', 100),
66                     live=True
67                 ))
68             elif stream.get('type') == 'rtmp':
69                 url = stream.get('url') + '/' + stream.get('stream_name')
70                 formats.append({
71                     'url': url,
72                     'format_id': 'rtmp',
73                     'protocol': 'rtmp',
74                     'ext': 'flv',
75                     'preference': stream.get('quality', 100),
76                     'format_note': stream.get('label')
77                 })
78
79         self._sort_formats(formats)
80         return formats