Merge branch 'lecture2go' of https://github.com/nichdu/youtube-dl into nichdu-lecture2go
[youtube-dl] / youtube_dl / extractor / hitbox.py
index eab2749ecdce8c83984ba1ab2540391141dcb255..421f55bbeaed2c1249833e5136ff479557c1bccc 100644 (file)
@@ -1,14 +1,21 @@
 # coding: utf-8
 from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    clean_html,
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
+    compat_str,
+    determine_ext,
 )
 
 
 class HitboxIE(InfoExtractor):
+    IE_NAME = 'hitbox'
     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.hitbox.tv/video/203213',
@@ -16,13 +23,14 @@ class HitboxIE(InfoExtractor):
             'id': '203213',
             'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
             'alt_title': 'hitboxlive - Aug 9th #6',
-            'description': '\n',
+            'description': '',
             'ext': 'mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 215,
+            'duration': 215.1666,
             'resolution': 'HD 720p',
-            'uploader_id': 'hitboxlive',
+            'uploader': 'hitboxlive',
             'view_count': int,
+            'timestamp': 1407576133,
             'upload_date': '20140809',
             'categories': ['Live Show'],
         },
@@ -35,8 +43,8 @@ class HitboxIE(InfoExtractor):
     def _extract_metadata(self, url, video_id):
         thumb_base = 'https://edge.sf.hitbox.tv'
         metadata = self._download_json(
-            '%s/%s' % (url, video_id), video_id
-        )
+            '%s/%s' % (url, video_id), video_id,
+            'Downloading metadata JSON')
 
         date = 'media_live_since'
         media_type = 'livestream'
@@ -47,11 +55,13 @@ class HitboxIE(InfoExtractor):
         video_meta = metadata.get(media_type, [])[0]
         title = video_meta.get('media_status')
         alt_title = video_meta.get('media_title')
-        description = video_meta.get('media_description_md')
-        duration = int(float(video_meta.get('media_duration')))
+        description = clean_html(
+            video_meta.get('media_description') or
+            video_meta.get('media_description_md'))
+        duration = float_or_none(video_meta.get('media_duration'))
         uploader = video_meta.get('media_user_name')
-        views = int(video_meta.get('media_views'))
-        upload_date = unified_strdate(video_meta.get(date))
+        views = int_or_none(video_meta.get('media_views'))
+        timestamp = parse_iso8601(video_meta.get(date), ' ')
         categories = [video_meta.get('category_name')]
         thumbs = [
             {'url': thumb_base + video_meta.get('media_thumbnail'),
@@ -70,47 +80,67 @@ class HitboxIE(InfoExtractor):
             'ext': 'mp4',
             'thumbnails': thumbs,
             'duration': duration,
-            'uploader_id': uploader,
+            'uploader': uploader,
             'view_count': views,
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'categories': categories,
         }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        metadata = self._extract_metadata(
-            'https://www.hitbox.tv/api/media/video',
-            video_id
-        )
-
         player_config = self._download_json(
-            'https://www.hitbox.tv/api/player/config/video/%s' % (video_id),
-            video_id
-        )
+            'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
+            video_id, 'Downloading video JSON')
 
-        clip = player_config.get('clip')
-        video_url = clip.get('url')
-        res = clip.get('bitrates', [])[0].get('label')
+        formats = []
+        for video in player_config['clip']['bitrates']:
+            label = video.get('label')
+            if label == 'Auto':
+                continue
+            video_url = video.get('url')
+            if not video_url:
+                continue
+            bitrate = int_or_none(video.get('bitrate'))
+            if determine_ext(video_url) == 'm3u8':
+                if not video_url.startswith('http'):
+                    continue
+                formats.append({
+                    'url': video_url,
+                    'ext': 'mp4',
+                    'tbr': bitrate,
+                    'format_note': label,
+                    'protocol': 'm3u8_native',
+                })
+            else:
+                formats.append({
+                    'url': video_url,
+                    'tbr': bitrate,
+                    'format_note': label,
+                })
+        self._sort_formats(formats)
 
-        metadata['resolution'] = res
-        metadata['url'] = video_url
-        metadata['protocol'] = 'm3u8'
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/video',
+            video_id)
+        metadata['formats'] = formats
 
         return metadata
 
 
 class HitboxLiveIE(HitboxIE):
+    IE_NAME = 'hitbox:live'
     _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
     _TEST = {
         'url': 'http://www.hitbox.tv/dimak',
         'info_dict': {
             'id': 'dimak',
             'ext': 'mp4',
-            'description': str,
-            'upload_date': str,
-            'title': str,
-            'uploader_id': 'Dimak',
+            'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
+            'timestamp': int,
+            'upload_date': compat_str,
+            'title': compat_str,
+            'uploader': 'Dimak',
         },
         'params': {
             # live
@@ -121,15 +151,9 @@ class HitboxLiveIE(HitboxIE):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        metadata = self._extract_metadata(
-            'https://www.hitbox.tv/api/media/live',
-            video_id
-        )
-
         player_config = self._download_json(
-            'https://www.hitbox.tv/api/player/config/live/%s' % (video_id),
-            video_id
-        )
+            'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
+            video_id)
 
         formats = []
         cdns = player_config.get('cdns')
@@ -141,20 +165,39 @@ class HitboxLiveIE(HitboxIE):
                 servers.append(base_url)
                 for stream in cdn.get('bitrates'):
                     label = stream.get('label')
-                    if label != 'Auto':
+                    if label == 'Auto':
+                        continue
+                    stream_url = stream.get('url')
+                    if not stream_url:
+                        continue
+                    bitrate = int_or_none(stream.get('bitrate'))
+                    if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
+                        if not stream_url.startswith('http'):
+                            continue
+                        formats.append({
+                            'url': stream_url,
+                            'ext': 'mp4',
+                            'tbr': bitrate,
+                            'format_note': label,
+                            'rtmp_live': True,
+                        })
+                    else:
                         formats.append({
-                            'url': '%s/%s' % (base_url, stream.get('url')),
+                            'url': '%s/%s' % (base_url, stream_url),
                             'ext': 'mp4',
-                            'vbr': stream.get('bitrate'),
-                            'resolution': label,
+                            'tbr': bitrate,
                             'rtmp_live': True,
                             'format_note': host,
                             'page_url': url,
                             'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
                         })
-
         self._sort_formats(formats)
+
+        metadata = self._extract_metadata(
+            'https://www.hitbox.tv/api/media/live',
+            video_id)
         metadata['formats'] = formats
         metadata['is_live'] = True
         metadata['title'] = self._live_title(metadata.get('title'))
+
         return metadata