ff797438dec12303aab55af0e29aac8bd35229c5
[youtube-dl] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     clean_html,
9     parse_iso8601,
10     float_or_none,
11     int_or_none,
12     compat_str,
13     determine_ext,
14 )
15
16
17 class HitboxIE(InfoExtractor):
18     IE_NAME = 'hitbox'
19     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
20     _TEST = {
21         'url': 'http://www.hitbox.tv/video/203213',
22         'info_dict': {
23             'id': '203213',
24             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25             'alt_title': 'hitboxlive - Aug 9th #6',
26             'description': '',
27             'ext': 'mp4',
28             'thumbnail': 're:^https?://.*\.jpg$',
29             'duration': 215.1666,
30             'resolution': 'HD 720p',
31             'uploader': 'hitboxlive',
32             'view_count': int,
33             'timestamp': 1407576133,
34             'upload_date': '20140809',
35             'categories': ['Live Show'],
36         },
37         'params': {
38             # m3u8 download
39             'skip_download': True,
40         },
41     }
42
43     def _extract_metadata(self, url, video_id):
44         thumb_base = 'https://edge.sf.hitbox.tv'
45         metadata = self._download_json(
46             '%s/%s' % (url, video_id), video_id,
47             'Downloading metadata JSON')
48
49         date = 'media_live_since'
50         media_type = 'livestream'
51         if metadata.get('media_type') == 'video':
52             media_type = 'video'
53             date = 'media_date_added'
54
55         video_meta = metadata.get(media_type, [])[0]
56         title = video_meta.get('media_status')
57         alt_title = video_meta.get('media_title')
58         description = clean_html(
59             video_meta.get('media_description') or
60             video_meta.get('media_description_md'))
61         duration = float_or_none(video_meta.get('media_duration'))
62         uploader = video_meta.get('media_user_name')
63         views = int_or_none(video_meta.get('media_views'))
64         timestamp = parse_iso8601(video_meta.get(date), ' ')
65         categories = [video_meta.get('category_name')]
66         thumbs = [
67             {'url': thumb_base + video_meta.get('media_thumbnail'),
68              'width': 320,
69              'height': 180},
70             {'url': thumb_base + video_meta.get('media_thumbnail_large'),
71              'width': 768,
72              'height': 432},
73         ]
74
75         return {
76             'id': video_id,
77             'title': title,
78             'alt_title': alt_title,
79             'description': description,
80             'ext': 'mp4',
81             'thumbnails': thumbs,
82             'duration': duration,
83             'uploader': uploader,
84             'view_count': views,
85             'timestamp': timestamp,
86             'categories': categories,
87         }
88
89     def _real_extract(self, url):
90         video_id = self._match_id(url)
91
92         player_config = self._download_json(
93             'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
94             video_id, 'Downloading video JSON')
95
96         formats = []
97         for video in player_config['clip']['bitrates']:
98             label = video.get('label')
99             if label == 'Auto':
100                 continue
101             video_url = video.get('url')
102             if not video_url:
103                 continue
104             bitrate = int_or_none(video.get('bitrate'))
105             if determine_ext(video_url) == 'm3u8':
106                 if not video_url.startswith('http'):
107                     continue
108                 formats.append({
109                     'url': video_url,
110                     'ext': 'mp4',
111                     'tbr': bitrate,
112                     'format_note': label,
113                     'protocol': 'm3u8_native',
114                 })
115             else:
116                 formats.append({
117                     'url': video_url,
118                     'tbr': bitrate,
119                     'format_note': label,
120                 })
121         self._sort_formats(formats)
122
123         metadata = self._extract_metadata(
124             'https://www.hitbox.tv/api/media/video',
125             video_id)
126         metadata['formats'] = formats
127
128         return metadata
129
130
131 class HitboxLiveIE(HitboxIE):
132     IE_NAME = 'hitbox:live'
133     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
134     _TEST = {
135         'url': 'http://www.hitbox.tv/dimak',
136         'info_dict': {
137             'id': 'dimak',
138             'ext': 'mp4',
139             'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
140             'timestamp': int,
141             'upload_date': compat_str,
142             'title': compat_str,
143             'uploader': 'Dimak',
144         },
145         'params': {
146             # live
147             'skip_download': True,
148         },
149     }
150
151     def _real_extract(self, url):
152         video_id = self._match_id(url)
153
154         player_config = self._download_json(
155             'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
156             video_id)
157
158         formats = []
159         cdns = player_config.get('cdns')
160         servers = []
161         for cdn in cdns:
162             # Subscribe URLs are not playable
163             if cdn.get('rtmpSubscribe') is True:
164                 continue
165             base_url = cdn.get('netConnectionUrl')
166             host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
167             if base_url not in servers:
168                 servers.append(base_url)
169                 for stream in cdn.get('bitrates'):
170                     label = stream.get('label')
171                     if label == 'Auto':
172                         continue
173                     stream_url = stream.get('url')
174                     if not stream_url:
175                         continue
176                     bitrate = int_or_none(stream.get('bitrate'))
177                     if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
178                         if not stream_url.startswith('http'):
179                             continue
180                         formats.append({
181                             'url': stream_url,
182                             'ext': 'mp4',
183                             'tbr': bitrate,
184                             'format_note': label,
185                             'rtmp_live': True,
186                         })
187                     else:
188                         formats.append({
189                             'url': '%s/%s' % (base_url, stream_url),
190                             'ext': 'mp4',
191                             'tbr': bitrate,
192                             'rtmp_live': True,
193                             'format_note': host,
194                             'page_url': url,
195                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
196                         })
197         self._sort_formats(formats)
198
199         metadata = self._extract_metadata(
200             'https://www.hitbox.tv/api/media/live',
201             video_id)
202         metadata['formats'] = formats
203         metadata['is_live'] = True
204         metadata['title'] = self._live_title(metadata.get('title'))
205
206         return metadata