[hitbox] add support for live streams
[youtube-dl] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     unified_strdate,
8 )
9
10
11 class HitboxIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
13     _TEST = {
14         'url': 'http://www.hitbox.tv/video/203213',
15         'info_dict': {
16             'id': '203213',
17             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
18             'alt_title': 'hitboxlive - Aug 9th #6',
19             'description': '\n',
20             'ext': 'mp4',
21             'thumbnail': 're:^https?://.*\.jpg$',
22             'duration': 215,
23             'resolution': 'HD 720p',
24             'uploader_id': 'hitboxlive',
25             'view_count': int,
26             'upload_date': '20140809',
27             'categories': ['Live Show'],
28         },
29         'params': {
30             # m3u8 download
31             'skip_download': True,
32         },
33     }
34
35     def _extract_metadata(self, url, video_id):
36         thumb_base = 'https://edge.sf.hitbox.tv'
37         metadata = self._download_json(
38             '%s/%s' % (url, video_id), video_id
39         )
40
41         date = 'media_live_since'
42         media_type = 'livestream'
43         if metadata.get('media_type') == 'video':
44             media_type = 'video'
45             date = 'media_date_added'
46
47         video_meta = metadata.get(media_type, [])[0]
48         title = video_meta.get('media_status')
49         alt_title = video_meta.get('media_title')
50         description = video_meta.get('media_description_md')
51         duration = int(float(video_meta.get('media_duration')))
52         uploader = video_meta.get('media_user_name')
53         views = int(video_meta.get('media_views'))
54         upload_date = unified_strdate(video_meta.get(date))
55         categories = [video_meta.get('category_name')]
56         thumbs = [
57             {'url': thumb_base + video_meta.get('media_thumbnail'),
58              'width': 320,
59              'height': 180},
60             {'url': thumb_base + video_meta.get('media_thumbnail_large'),
61              'width': 768,
62              'height': 432},
63         ]
64
65         return {
66             'id': video_id,
67             'title': title,
68             'alt_title': alt_title,
69             'description': description,
70             'ext': 'mp4',
71             'thumbnails': thumbs,
72             'duration': duration,
73             'uploader_id': uploader,
74             'view_count': views,
75             'upload_date': upload_date,
76             'categories': categories,
77         }
78
79     def _real_extract(self, url):
80         video_id = self._match_id(url)
81
82         metadata = self._extract_metadata(
83             'https://www.hitbox.tv/api/media/video',
84             video_id
85         )
86
87         player_config = self._download_json(
88             'https://www.hitbox.tv/api/player/config/video/%s' % (video_id),
89             video_id
90         )
91
92         clip = player_config.get('clip')
93         video_url = clip.get('url')
94         res = clip.get('bitrates', [])[0].get('label')
95
96         metadata['resolution'] = res
97         metadata['url'] = video_url
98         metadata['protocol'] = 'm3u8'
99
100         return metadata
101
102
103 class HitboxLiveIE(HitboxIE):
104     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
105     _TEST = {
106         'url': 'http://www.hitbox.tv/dimak',
107         'info_dict': {
108             'id': 'dimak',
109             'ext': 'mp4',
110             'description': str,
111             'upload_date': str,
112             'title': str,
113             'uploader_id': 'Dimak',
114         },
115         'params': {
116             # live
117             'skip_download': True,
118         },
119     }
120
121     def _real_extract(self, url):
122         video_id = self._match_id(url)
123
124         metadata = self._extract_metadata(
125             'https://www.hitbox.tv/api/media/live',
126             video_id
127         )
128
129         player_config = self._download_json(
130             'https://www.hitbox.tv/api/player/config/live/%s' % (video_id),
131             video_id
132         )
133
134         formats = []
135         cdns = player_config.get('cdns')
136         servers = []
137         for cdn in cdns:
138             base_url = cdn.get('netConnectionUrl')
139             host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
140             if base_url not in servers:
141                 servers.append(base_url)
142                 for stream in cdn.get('bitrates'):
143                     label = stream.get('label')
144                     if label != 'Auto':
145                         formats.append({
146                             'url': '%s/%s' % (base_url, stream.get('url')),
147                             'ext': 'mp4',
148                             'vbr': stream.get('bitrate'),
149                             'resolution': label,
150                             'rtmp_live': True,
151                             'format_note': host,
152                             'page_url': url,
153                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
154                         })
155
156         self._sort_formats(formats)
157         metadata['formats'] = formats
158         metadata['is_live'] = True
159         metadata['title'] = self._live_title(metadata.get('title'))
160         return metadata