Merge remote-tracking branch 'ivan/muxed-mtime'
[youtube-dl] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     clean_html,
9     parse_iso8601,
10     float_or_none,
11     int_or_none,
12     compat_str,
13 )
14
15
16 class HitboxIE(InfoExtractor):
17     IE_NAME = 'hitbox'
18     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
19     _TEST = {
20         'url': 'http://www.hitbox.tv/video/203213',
21         'info_dict': {
22             'id': '203213',
23             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
24             'alt_title': 'hitboxlive - Aug 9th #6',
25             'description': '',
26             'ext': 'mp4',
27             'thumbnail': 're:^https?://.*\.jpg$',
28             'duration': 215.1666,
29             'resolution': 'HD 720p',
30             'uploader': 'hitboxlive',
31             'view_count': int,
32             'timestamp': 1407576133,
33             'upload_date': '20140809',
34             'categories': ['Live Show'],
35         },
36         'params': {
37             # m3u8 download
38             'skip_download': True,
39         },
40     }
41
42     def _extract_metadata(self, url, video_id):
43         thumb_base = 'https://edge.sf.hitbox.tv'
44         metadata = self._download_json(
45             '%s/%s' % (url, video_id), video_id)
46
47         date = 'media_live_since'
48         media_type = 'livestream'
49         if metadata.get('media_type') == 'video':
50             media_type = 'video'
51             date = 'media_date_added'
52
53         video_meta = metadata.get(media_type, [])[0]
54         title = video_meta.get('media_status')
55         alt_title = video_meta.get('media_title')
56         description = clean_html(
57             video_meta.get('media_description') or
58             video_meta.get('media_description_md'))
59         duration = float_or_none(video_meta.get('media_duration'))
60         uploader = video_meta.get('media_user_name')
61         views = int_or_none(video_meta.get('media_views'))
62         timestamp = parse_iso8601(video_meta.get(date), ' ')
63         categories = [video_meta.get('category_name')]
64         thumbs = [
65             {'url': thumb_base + video_meta.get('media_thumbnail'),
66              'width': 320,
67              'height': 180},
68             {'url': thumb_base + video_meta.get('media_thumbnail_large'),
69              'width': 768,
70              'height': 432},
71         ]
72
73         return {
74             'id': video_id,
75             'title': title,
76             'alt_title': alt_title,
77             'description': description,
78             'ext': 'mp4',
79             'thumbnails': thumbs,
80             'duration': duration,
81             'uploader': uploader,
82             'view_count': views,
83             'timestamp': timestamp,
84             'categories': categories,
85         }
86
87     def _real_extract(self, url):
88         video_id = self._match_id(url)
89
90         metadata = self._extract_metadata(
91             'https://www.hitbox.tv/api/media/video',
92             video_id)
93
94         player_config = self._download_json(
95             'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
96             video_id)
97
98         clip = player_config.get('clip')
99         video_url = clip.get('url')
100         res = clip.get('bitrates', [])[0].get('label')
101
102         metadata['resolution'] = res
103         metadata['url'] = video_url
104         metadata['protocol'] = 'm3u8'
105
106         return metadata
107
108
109 class HitboxLiveIE(HitboxIE):
110     IE_NAME = 'hitbox:live'
111     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
112     _TEST = {
113         'url': 'http://www.hitbox.tv/dimak',
114         'info_dict': {
115             'id': 'dimak',
116             'ext': 'mp4',
117             'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
118             'timestamp': int,
119             'upload_date': compat_str,
120             'title': compat_str,
121             'uploader': 'Dimak',
122         },
123         'params': {
124             # live
125             'skip_download': True,
126         },
127     }
128
129     def _real_extract(self, url):
130         video_id = self._match_id(url)
131
132         metadata = self._extract_metadata(
133             'https://www.hitbox.tv/api/media/live',
134             video_id)
135
136         player_config = self._download_json(
137             'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
138             video_id)
139
140         formats = []
141         cdns = player_config.get('cdns')
142         servers = []
143         for cdn in cdns:
144             base_url = cdn.get('netConnectionUrl')
145             host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
146             if base_url not in servers:
147                 servers.append(base_url)
148                 for stream in cdn.get('bitrates'):
149                     label = stream.get('label')
150                     if label != 'Auto':
151                         formats.append({
152                             'url': '%s/%s' % (base_url, stream.get('url')),
153                             'ext': 'mp4',
154                             'vbr': stream.get('bitrate'),
155                             'resolution': label,
156                             'rtmp_live': True,
157                             'format_note': host,
158                             'page_url': url,
159                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
160                         })
161
162         self._sort_formats(formats)
163         metadata['formats'] = formats
164         metadata['is_live'] = True
165         metadata['title'] = self._live_title(metadata.get('title'))
166         return metadata