[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / beampro.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     clean_html,
8     compat_str,
9     float_or_none,
10     int_or_none,
11     parse_iso8601,
12     try_get,
13     urljoin,
14 )
15
16
17 class BeamProBaseIE(InfoExtractor):
18     _API_BASE = 'https://mixer.com/api/v1'
19     _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
20
21     def _extract_channel_info(self, chan):
22         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
23         return {
24             'uploader': chan.get('token') or try_get(
25                 chan, lambda x: x['user']['username'], compat_str),
26             'uploader_id': compat_str(user_id) if user_id else None,
27             'age_limit': self._RATINGS.get(chan.get('audience')),
28         }
29
30
31 class BeamProLiveIE(BeamProBaseIE):
32     IE_NAME = 'Mixer:live'
33     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
34     _TEST = {
35         'url': 'http://mixer.com/niterhayven',
36         'info_dict': {
37             'id': '261562',
38             'ext': 'mp4',
39             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
40             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
41             'thumbnail': r're:https://.*\.jpg$',
42             'timestamp': 1483477281,
43             'upload_date': '20170103',
44             'uploader': 'niterhayven',
45             'uploader_id': '373396',
46             'age_limit': 18,
47             'is_live': True,
48             'view_count': int,
49         },
50         'skip': 'niterhayven is offline',
51         'params': {
52             'skip_download': True,
53         },
54     }
55
56     _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
57
58     @classmethod
59     def suitable(cls, url):
60         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
61
62     def _real_extract(self, url):
63         channel_name = self._match_id(url)
64
65         chan = self._download_json(
66             '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
67
68         if chan.get('online') is False:
69             raise ExtractorError(
70                 '{0} is offline'.format(channel_name), expected=True)
71
72         channel_id = chan['id']
73
74         def manifest_url(kind):
75             return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
76
77         formats = self._extract_m3u8_formats(
78             manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
79             fatal=False)
80         formats.extend(self._extract_smil_formats(
81             manifest_url('smil'), channel_name, fatal=False))
82         self._sort_formats(formats)
83
84         info = {
85             'id': compat_str(chan.get('id') or channel_name),
86             'title': self._live_title(chan.get('name') or channel_name),
87             'description': clean_html(chan.get('description')),
88             'thumbnail': try_get(
89                 chan, lambda x: x['thumbnail']['url'], compat_str),
90             'timestamp': parse_iso8601(chan.get('updatedAt')),
91             'is_live': True,
92             'view_count': int_or_none(chan.get('viewersTotal')),
93             'formats': formats,
94         }
95         info.update(self._extract_channel_info(chan))
96
97         return info
98
99
100 class BeamProVodIE(BeamProBaseIE):
101     IE_NAME = 'Mixer:vod'
102     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
103     _TESTS = [{
104         'url': 'https://mixer.com/willow8714?vod=2259830',
105         'md5': 'b2431e6e8347dc92ebafb565d368b76b',
106         'info_dict': {
107             'id': '2259830',
108             'ext': 'mp4',
109             'title': 'willow8714\'s Channel',
110             'duration': 6828.15,
111             'thumbnail': r're:https://.*source\.png$',
112             'timestamp': 1494046474,
113             'upload_date': '20170506',
114             'uploader': 'willow8714',
115             'uploader_id': '6085379',
116             'age_limit': 13,
117             'view_count': int,
118         },
119         'params': {
120             'skip_download': True,
121         },
122     }, {
123         'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
124         'only_matching': True,
125     }, {
126         'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
127         'only_matching': True,
128     }]
129
130     @staticmethod
131     def _extract_format(vod, vod_type):
132         if not vod.get('baseUrl'):
133             return []
134
135         if vod_type == 'hls':
136             filename, protocol = 'manifest.m3u8', 'm3u8_native'
137         elif vod_type == 'raw':
138             filename, protocol = 'source.mp4', 'https'
139         else:
140             assert False
141
142         data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
143
144         format_id = [vod_type]
145         if isinstance(data.get('Height'), compat_str):
146             format_id.append('%sp' % data['Height'])
147
148         return [{
149             'url': urljoin(vod['baseUrl'], filename),
150             'format_id': '-'.join(format_id),
151             'ext': 'mp4',
152             'protocol': protocol,
153             'width': int_or_none(data.get('Width')),
154             'height': int_or_none(data.get('Height')),
155             'fps': int_or_none(data.get('Fps')),
156             'tbr': int_or_none(data.get('Bitrate'), 1000),
157         }]
158
159     def _real_extract(self, url):
160         vod_id = self._match_id(url)
161
162         vod_info = self._download_json(
163             '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
164
165         state = vod_info.get('state')
166         if state != 'AVAILABLE':
167             raise ExtractorError(
168                 'VOD %s is not available (state: %s)' % (vod_id, state),
169                 expected=True)
170
171         formats = []
172         thumbnail_url = None
173
174         for vod in vod_info['vods']:
175             vod_type = vod.get('format')
176             if vod_type in ('hls', 'raw'):
177                 formats.extend(self._extract_format(vod, vod_type))
178             elif vod_type == 'thumbnail':
179                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
180
181         self._sort_formats(formats)
182
183         info = {
184             'id': vod_id,
185             'title': vod_info.get('name') or vod_id,
186             'duration': float_or_none(vod_info.get('duration')),
187             'thumbnail': thumbnail_url,
188             'timestamp': parse_iso8601(vod_info.get('createdAt')),
189             'view_count': int_or_none(vod_info.get('viewsTotal')),
190             'formats': formats,
191         }
192         info.update(self._extract_channel_info(vod_info.get('channel') or {}))
193
194         return info