[mixer:vod] Relax _VALID_URL (closes #21531) (#21536)
[youtube-dl] / youtube_dl / extractor / beampro.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     clean_html,
8     compat_str,
9     float_or_none,
10     int_or_none,
11     parse_iso8601,
12     try_get,
13     urljoin,
14 )
15
16
17 class BeamProBaseIE(InfoExtractor):
18     _API_BASE = 'https://mixer.com/api/v1'
19     _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
20
21     def _extract_channel_info(self, chan):
22         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
23         return {
24             'uploader': chan.get('token') or try_get(
25                 chan, lambda x: x['user']['username'], compat_str),
26             'uploader_id': compat_str(user_id) if user_id else None,
27             'age_limit': self._RATINGS.get(chan.get('audience')),
28         }
29
30
31 class BeamProLiveIE(BeamProBaseIE):
32     IE_NAME = 'Mixer:live'
33     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
34     _TEST = {
35         'url': 'http://mixer.com/niterhayven',
36         'info_dict': {
37             'id': '261562',
38             'ext': 'mp4',
39             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
40             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
41             'thumbnail': r're:https://.*\.jpg$',
42             'timestamp': 1483477281,
43             'upload_date': '20170103',
44             'uploader': 'niterhayven',
45             'uploader_id': '373396',
46             'age_limit': 18,
47             'is_live': True,
48             'view_count': int,
49         },
50         'skip': 'niterhayven is offline',
51         'params': {
52             'skip_download': True,
53         },
54     }
55
56     _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
57
58     @classmethod
59     def suitable(cls, url):
60         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
61
62     def _real_extract(self, url):
63         channel_name = self._match_id(url)
64
65         chan = self._download_json(
66             '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
67
68         if chan.get('online') is False:
69             raise ExtractorError(
70                 '{0} is offline'.format(channel_name), expected=True)
71
72         channel_id = chan['id']
73
74         def manifest_url(kind):
75             return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
76
77         formats = self._extract_m3u8_formats(
78             manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
79             fatal=False)
80         formats.extend(self._extract_smil_formats(
81             manifest_url('smil'), channel_name, fatal=False))
82         self._sort_formats(formats)
83
84         info = {
85             'id': compat_str(chan.get('id') or channel_name),
86             'title': self._live_title(chan.get('name') or channel_name),
87             'description': clean_html(chan.get('description')),
88             'thumbnail': try_get(
89                 chan, lambda x: x['thumbnail']['url'], compat_str),
90             'timestamp': parse_iso8601(chan.get('updatedAt')),
91             'is_live': True,
92             'view_count': int_or_none(chan.get('viewersTotal')),
93             'formats': formats,
94         }
95         info.update(self._extract_channel_info(chan))
96
97         return info
98
99
100 class BeamProVodIE(BeamProBaseIE):
101     IE_NAME = 'Mixer:vod'
102     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\w+)'
103     _TESTS = [{
104         'url': 'https://mixer.com/willow8714?vod=2259830',
105         'md5': 'b2431e6e8347dc92ebafb565d368b76b',
106         'info_dict': {
107             'id': '2259830',
108             'ext': 'mp4',
109             'title': 'willow8714\'s Channel',
110             'duration': 6828.15,
111             'thumbnail': r're:https://.*source\.png$',
112             'timestamp': 1494046474,
113             'upload_date': '20170506',
114             'uploader': 'willow8714',
115             'uploader_id': '6085379',
116             'age_limit': 13,
117             'view_count': int,
118         },
119         'params': {
120             'skip_download': True,
121         },
122     }, {
123         'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
124         'only_matching': True,
125     }]
126
127     @staticmethod
128     def _extract_format(vod, vod_type):
129         if not vod.get('baseUrl'):
130             return []
131
132         if vod_type == 'hls':
133             filename, protocol = 'manifest.m3u8', 'm3u8_native'
134         elif vod_type == 'raw':
135             filename, protocol = 'source.mp4', 'https'
136         else:
137             assert False
138
139         data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
140
141         format_id = [vod_type]
142         if isinstance(data.get('Height'), compat_str):
143             format_id.append('%sp' % data['Height'])
144
145         return [{
146             'url': urljoin(vod['baseUrl'], filename),
147             'format_id': '-'.join(format_id),
148             'ext': 'mp4',
149             'protocol': protocol,
150             'width': int_or_none(data.get('Width')),
151             'height': int_or_none(data.get('Height')),
152             'fps': int_or_none(data.get('Fps')),
153             'tbr': int_or_none(data.get('Bitrate'), 1000),
154         }]
155
156     def _real_extract(self, url):
157         vod_id = self._match_id(url)
158
159         vod_info = self._download_json(
160             '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
161
162         state = vod_info.get('state')
163         if state != 'AVAILABLE':
164             raise ExtractorError(
165                 'VOD %s is not available (state: %s)' % (vod_id, state),
166                 expected=True)
167
168         formats = []
169         thumbnail_url = None
170
171         for vod in vod_info['vods']:
172             vod_type = vod.get('format')
173             if vod_type in ('hls', 'raw'):
174                 formats.extend(self._extract_format(vod, vod_type))
175             elif vod_type == 'thumbnail':
176                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
177
178         self._sort_formats(formats)
179
180         info = {
181             'id': vod_id,
182             'title': vod_info.get('name') or vod_id,
183             'duration': float_or_none(vod_info.get('duration')),
184             'thumbnail': thumbnail_url,
185             'timestamp': parse_iso8601(vod_info.get('createdAt')),
186             'view_count': int_or_none(vod_info.get('viewsTotal')),
187             'formats': formats,
188         }
189         info.update(self._extract_channel_info(vod_info.get('channel') or {}))
190
191         return info