Merge branch 'crooksandliars' of https://github.com/fstirlitz/youtube-dl into fstirli...
[youtube-dl] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     clean_html,
9     parse_iso8601,
10     float_or_none,
11     int_or_none,
12     compat_str,
13     determine_ext,
14 )
15
16
17 class HitboxIE(InfoExtractor):
18     IE_NAME = 'hitbox'
19     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
20     _TEST = {
21         'url': 'http://www.hitbox.tv/video/203213',
22         'info_dict': {
23             'id': '203213',
24             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25             'alt_title': 'hitboxlive - Aug 9th #6',
26             'description': '',
27             'ext': 'mp4',
28             'thumbnail': 're:^https?://.*\.jpg$',
29             'duration': 215.1666,
30             'resolution': 'HD 720p',
31             'uploader': 'hitboxlive',
32             'view_count': int,
33             'timestamp': 1407576133,
34             'upload_date': '20140809',
35             'categories': ['Live Show'],
36         },
37         'params': {
38             # m3u8 download
39             'skip_download': True,
40         },
41     }
42
43     def _extract_metadata(self, url, video_id):
44         thumb_base = 'https://edge.sf.hitbox.tv'
45         metadata = self._download_json(
46             '%s/%s' % (url, video_id), video_id)
47
48         date = 'media_live_since'
49         media_type = 'livestream'
50         if metadata.get('media_type') == 'video':
51             media_type = 'video'
52             date = 'media_date_added'
53
54         video_meta = metadata.get(media_type, [])[0]
55         title = video_meta.get('media_status')
56         alt_title = video_meta.get('media_title')
57         description = clean_html(
58             video_meta.get('media_description') or
59             video_meta.get('media_description_md'))
60         duration = float_or_none(video_meta.get('media_duration'))
61         uploader = video_meta.get('media_user_name')
62         views = int_or_none(video_meta.get('media_views'))
63         timestamp = parse_iso8601(video_meta.get(date), ' ')
64         categories = [video_meta.get('category_name')]
65         thumbs = [
66             {'url': thumb_base + video_meta.get('media_thumbnail'),
67              'width': 320,
68              'height': 180},
69             {'url': thumb_base + video_meta.get('media_thumbnail_large'),
70              'width': 768,
71              'height': 432},
72         ]
73
74         return {
75             'id': video_id,
76             'title': title,
77             'alt_title': alt_title,
78             'description': description,
79             'ext': 'mp4',
80             'thumbnails': thumbs,
81             'duration': duration,
82             'uploader': uploader,
83             'view_count': views,
84             'timestamp': timestamp,
85             'categories': categories,
86         }
87
88     def _real_extract(self, url):
89         video_id = self._match_id(url)
90
91         metadata = self._extract_metadata(
92             'https://www.hitbox.tv/api/media/video',
93             video_id)
94
95         player_config = self._download_json(
96             'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
97             video_id)
98
99         clip = player_config.get('clip')
100         video_url = clip.get('url')
101         res = clip.get('bitrates', [])[0].get('label')
102
103         metadata['resolution'] = res
104         metadata['url'] = video_url
105         metadata['protocol'] = 'm3u8'
106
107         return metadata
108
109
110 class HitboxLiveIE(HitboxIE):
111     IE_NAME = 'hitbox:live'
112     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
113     _TEST = {
114         'url': 'http://www.hitbox.tv/dimak',
115         'info_dict': {
116             'id': 'dimak',
117             'ext': 'mp4',
118             'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
119             'timestamp': int,
120             'upload_date': compat_str,
121             'title': compat_str,
122             'uploader': 'Dimak',
123         },
124         'params': {
125             # live
126             'skip_download': True,
127         },
128     }
129
130     def _real_extract(self, url):
131         video_id = self._match_id(url)
132
133         metadata = self._extract_metadata(
134             'https://www.hitbox.tv/api/media/live',
135             video_id)
136
137         player_config = self._download_json(
138             'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
139             video_id)
140
141         formats = []
142         cdns = player_config.get('cdns')
143         servers = []
144         for cdn in cdns:
145             base_url = cdn.get('netConnectionUrl')
146             host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
147             if base_url not in servers:
148                 servers.append(base_url)
149                 for stream in cdn.get('bitrates'):
150                     label = stream.get('label')
151                     if label == 'Auto':
152                         continue
153                     stream_url = stream.get('url')
154                     if not stream_url:
155                         continue
156                     bitrate = int_or_none(stream.get('bitrate'))
157                     if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
158                         if not stream_url.startswith('http'):
159                             continue
160                         formats.append({
161                             'url': stream_url,
162                             'ext': 'mp4',
163                             'tbr': bitrate,
164                             'format_note': label,
165                             'rtmp_live': True,
166                         })
167                     else:
168                         formats.append({
169                             'url': '%s/%s' % (base_url, stream_url),
170                             'ext': 'mp4',
171                             'tbr': bitrate,
172                             'rtmp_live': True,
173                             'format_note': host,
174                             'page_url': url,
175                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
176                         })
177
178         self._sort_formats(formats)
179         metadata['formats'] = formats
180         metadata['is_live'] = True
181         metadata['title'] = self._live_title(metadata.get('title'))
182         return metadata