[hitbox] add support for live streams
authort0mm0 <dev@onairsoftware.co.uk>
Mon, 29 Dec 2014 20:10:59 +0000 (20:10 +0000)
committert0mm0 <dev@onairsoftware.co.uk>
Mon, 29 Dec 2014 20:12:23 +0000 (20:12 +0000)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/hitbox.py

index 3300dfeb48e7cbaf6369ca04ba582946e09cd194..4f5a1ce18fc9aa5f4ae9c75b4e0dfba190e94514 100644 (file)
@@ -172,7 +172,7 @@ from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
-from .hitbox import HitboxIE
+from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
index 239da3cd4738d27c62c990a7a72c1dea1ef26f07..eab2749ecdce8c83984ba1ab2540391141dcb255 100644 (file)
@@ -1,5 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 # coding: utf-8
 from __future__ import unicode_literals
+import re
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
@@ -9,33 +10,13 @@ from ..utils import (
 
 class HitboxIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
 
 class HitboxIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'http://www.hitbox.tv/video/358062',
-        'info_dict': {
-            'id': '358062',
-            'title': 'Megaman',
-            'alt_title': 'Megaman',
-            'description': '',
-            'ext': 'mp4',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 3834,
-            'resolution': 'SD 480p',
-            'uploader_id': 'supergreatfriend',
-            'view_count': int,
-            'upload_date': '20141225',
-            'categories': [None],
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
+    _TEST = {
         'url': 'http://www.hitbox.tv/video/203213',
         'info_dict': {
             'id': '203213',
             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
             'alt_title': 'hitboxlive - Aug 9th #6',
         'url': 'http://www.hitbox.tv/video/203213',
         'info_dict': {
             'id': '203213',
             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
             'alt_title': 'hitboxlive - Aug 9th #6',
-            'description': '',
+            'description': '\n',
             'ext': 'mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 215,
             'ext': 'mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
             'duration': 215,
@@ -49,24 +30,28 @@ class HitboxIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
             # m3u8 download
             'skip_download': True,
         },
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
+    }
 
 
+    def _extract_metadata(self, url, video_id):
         thumb_base = 'https://edge.sf.hitbox.tv'
         metadata = self._download_json(
         thumb_base = 'https://edge.sf.hitbox.tv'
         metadata = self._download_json(
-            'https://www.hitbox.tv/api/media/video/%s' % (video_id), video_id
+            '%s/%s' % (url, video_id), video_id
         )
 
         )
 
-        video_meta = metadata.get('video', [])[0]
+        date = 'media_live_since'
+        media_type = 'livestream'
+        if metadata.get('media_type') == 'video':
+            media_type = 'video'
+            date = 'media_date_added'
+
+        video_meta = metadata.get(media_type, [])[0]
         title = video_meta.get('media_status')
         alt_title = video_meta.get('media_title')
         title = video_meta.get('media_status')
         alt_title = video_meta.get('media_title')
-        description = video_meta.get('media_description')
+        description = video_meta.get('media_description_md')
         duration = int(float(video_meta.get('media_duration')))
         uploader = video_meta.get('media_user_name')
         views = int(video_meta.get('media_views'))
         duration = int(float(video_meta.get('media_duration')))
         uploader = video_meta.get('media_user_name')
         views = int(video_meta.get('media_views'))
-        upload_date = unified_strdate(video_meta.get('media_date_added'))
+        upload_date = unified_strdate(video_meta.get(date))
         categories = [video_meta.get('category_name')]
         thumbs = [
             {'url': thumb_base + video_meta.get('media_thumbnail'),
         categories = [video_meta.get('category_name')]
         thumbs = [
             {'url': thumb_base + video_meta.get('media_thumbnail'),
@@ -77,28 +62,99 @@ class HitboxIE(InfoExtractor):
              'height': 432},
         ]
 
              'height': 432},
         ]
 
-        player_config = self._download_json(
-            'https://www.hitbox.tv/api/player/config/video/%s' % (video_id),
-            video_id
-        )
-
-        clip = player_config.get('clip')
-        video_url = clip.get('url')
-        res = clip.get('bitrates', [])[0].get('label')
-
         return {
             'id': video_id,
             'title': title,
             'alt_title': alt_title,
             'description': description,
         return {
             'id': video_id,
             'title': title,
             'alt_title': alt_title,
             'description': description,
-            'url': video_url,
             'ext': 'mp4',
             'thumbnails': thumbs,
             'duration': duration,
             'ext': 'mp4',
             'thumbnails': thumbs,
             'duration': duration,
-            'resolution': res,
             'uploader_id': uploader,
             'view_count': views,
             'upload_date': upload_date,
             'categories': categories,
             'uploader_id': uploader,
             'view_count': views,
             'upload_date': upload_date,
             'categories': categories,
-            'protocol': 'm3u8',
         }
         }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/video',
+            video_id
+        )
+
+        player_config = self._download_json(
+            'https://www.hitbox.tv/api/player/config/video/%s' % (video_id),
+            video_id
+        )
+
+        clip = player_config.get('clip')
+        video_url = clip.get('url')
+        res = clip.get('bitrates', [])[0].get('label')
+
+        metadata['resolution'] = res
+        metadata['url'] = video_url
+        metadata['protocol'] = 'm3u8'
+
+        return metadata
+
+
+class HitboxLiveIE(HitboxIE):
+    _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
+    _TEST = {
+        'url': 'http://www.hitbox.tv/dimak',
+        'info_dict': {
+            'id': 'dimak',
+            'ext': 'mp4',
+            'description': str,
+            'upload_date': str,
+            'title': str,
+            'uploader_id': 'Dimak',
+        },
+        'params': {
+            # live
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/live',
+            video_id
+        )
+
+        player_config = self._download_json(
+            'https://www.hitbox.tv/api/player/config/live/%s' % (video_id),
+            video_id
+        )
+
+        formats = []
+        cdns = player_config.get('cdns')
+        servers = []
+        for cdn in cdns:
+            base_url = cdn.get('netConnectionUrl')
+            host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
+            if base_url not in servers:
+                servers.append(base_url)
+                for stream in cdn.get('bitrates'):
+                    label = stream.get('label')
+                    if label != 'Auto':
+                        formats.append({
+                            'url': '%s/%s' % (base_url, stream.get('url')),
+                            'ext': 'mp4',
+                            'vbr': stream.get('bitrate'),
+                            'resolution': label,
+                            'rtmp_live': True,
+                            'format_note': host,
+                            'page_url': url,
+                            'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
+                        })
+
+        self._sort_formats(formats)
+        metadata['formats'] = formats
+        metadata['is_live'] = True
+        metadata['title'] = self._live_title(metadata.get('title'))
+        return metadata