[beam:vod] Add extractor
[youtube-dl] / youtube_dl / extractor / beampro.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     clean_html,
8     compat_str,
9     float_or_none,
10     int_or_none,
11     parse_iso8601,
12     try_get,
13     urljoin,
14 )
15
16
17 class BeamProBaseIE(InfoExtractor):
18     _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
19
20     def _extract_channel_info(self, chan):
21         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
22         return {
23             'uploader': chan.get('token') or try_get(
24                 chan, lambda x: x['user']['username'], compat_str),
25             'uploader_id': compat_str(user_id) if user_id else None,
26             'age_limit': self._RATINGS.get(chan.get('audience')),
27         }
28
29
30 class BeamProLiveIE(BeamProBaseIE):
31     IE_NAME = 'Beam:live'
32     _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
33     _TEST = {
34         'url': 'http://www.beam.pro/niterhayven',
35         'info_dict': {
36             'id': '261562',
37             'ext': 'mp4',
38             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
39             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
40             'thumbnail': r're:https://.*\.jpg$',
41             'timestamp': 1483477281,
42             'upload_date': '20170103',
43             'uploader': 'niterhayven',
44             'uploader_id': '373396',
45             'age_limit': 18,
46             'is_live': True,
47             'view_count': int,
48         },
49         'skip': 'niterhayven is offline',
50         'params': {
51             'skip_download': True,
52         },
53     }
54
55     @classmethod
56     def suitable(cls, url):
57         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
58
59     def _real_extract(self, url):
60         channel_name = self._match_id(url)
61
62         chan = self._download_json(
63             'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
64
65         if chan.get('online') is False:
66             raise ExtractorError(
67                 '{0} is offline'.format(channel_name), expected=True)
68
69         channel_id = chan['id']
70
71         formats = self._extract_m3u8_formats(
72             'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
73             channel_name, ext='mp4', m3u8_id='hls', fatal=False)
74         self._sort_formats(formats)
75
76         info = {
77             'id': compat_str(chan.get('id') or channel_name),
78             'title': self._live_title(chan.get('name') or channel_name),
79             'description': clean_html(chan.get('description')),
80             'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
81             'timestamp': parse_iso8601(chan.get('updatedAt')),
82             'is_live': True,
83             'view_count': int_or_none(chan.get('viewersTotal')),
84             'formats': formats,
85         }
86         info.update(self._extract_channel_info(chan))
87
88         return info
89
90
91 class BeamProVodIE(BeamProBaseIE):
92     IE_NAME = 'Beam:vod'
93     _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/[^/?#&]+.*[?&]vod=(?P<id>\d+)'
94     _TEST = {
95         'url': 'https://beam.pro/willow8714?vod=2259830',
96         'md5': 'b2431e6e8347dc92ebafb565d368b76b',
97         'info_dict': {
98             'id': '2259830',
99             'ext': 'mp4',
100             'title': 'willow8714\'s Channel',
101             'duration': 6828.15,
102             'thumbnail': r're:https://.*source\.png$',
103             'timestamp': 1494046474,
104             'upload_date': '20170506',
105             'uploader': 'willow8714',
106             'uploader_id': '6085379',
107             'age_limit': 13,
108             'view_count': int,
109         },
110     }
111
112     def _extract_format(self, vod, vod_type):
113         if not vod.get('baseUrl'):
114             return []
115
116         if vod_type == 'hls':
117             filename, protocol = 'manifest.m3u8', 'm3u8'
118         elif vod_type == 'raw':
119             filename, protocol = 'source.mp4', 'https'
120         else:
121             return []
122
123         data = vod.get('data') or {}
124
125         format_id = [vod_type]
126         if 'Height' in data:
127             format_id.append('%sp' % data['Height'])
128
129         return [{
130             'url': urljoin(vod['baseUrl'], filename),
131             'format_id': '-'.join(format_id),
132             'ext': 'mp4',
133             'protocol': protocol,
134             'width': int_or_none(data.get('Width')),
135             'height': int_or_none(data.get('Height')),
136             'fps': int_or_none(data.get('Fps')),
137             'tbr': int_or_none(data.get('Bitrate'), 1000),
138         }]
139
140     def _real_extract(self, url):
141         vod_id = self._match_id(url)
142
143         vod_info = self._download_json(
144             'https://beam.pro/api/v1/recordings/%s' % vod_id, vod_id)
145
146         state = vod_info.get('state')
147         if state != 'AVAILABLE':
148             raise ExtractorError(
149                 'VOD %s is not available (state: %s)' % (vod_id, state), expected=True)
150
151         formats = []
152         thumbnail_url = None
153
154         for vod in vod_info['vods']:
155             vod_type = vod.get('format')
156             if vod_type in ('hls', 'raw'):
157                 formats.extend(self._extract_format(vod, vod_type))
158             elif vod_type == 'thumbnail':
159                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
160
161         self._sort_formats(formats)
162
163         info = {
164             'id': vod_id,
165             'title': vod_info.get('name') or vod_id,
166             'duration': float_or_none(vod_info.get('duration')),
167             'thumbnail': thumbnail_url,
168             'timestamp': parse_iso8601(vod_info.get('createdAt')),
169             'view_count': int_or_none(vod_info.get('viewsTotal')),
170             'formats': formats,
171         }
172
173         chan = vod_info.get('channel') or {}
174         info.update(self._extract_channel_info(chan))
175
176         return info