[mgtv] Add new extractor (closes #9212)
authorYen Chi Hsuan <yan12125@gmail.com>
Thu, 21 Apr 2016 15:29:51 +0000 (23:29 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Thu, 21 Apr 2016 15:29:51 +0000 (23:29 +0800)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/mgtv.py [new file with mode: 0644]

index 84bdf5e976f2b991b084c12055efb46e854ee3ba..d28f9e863b205a8de80754a554ea3fc4dd4b0180 100644 (file)
@@ -407,6 +407,7 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
+from .mgtv import MGTVIE
 from .minhateca import MinhatecaIE
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
new file mode 100644 (file)
index 0000000..8f16a8f
--- /dev/null
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MGTVIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+).html'
+
+    _TEST = {
+        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
+        'md5': '',
+        'info_dict': {
+            'id': '3116640',
+            'ext': 'mp4',
+            'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
+            'description': '我是歌手第四季双年巅峰会',
+            'duration': 7461,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }
+
+    _FORMAT_MAP = {
+        '标清': ('Standard', 0),
+        '高清': ('High', 1),
+        '超清': ('SuperHigh', 2),
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        api_data = self._download_json(
+            'http://v.api.mgtv.com/player/video', video_id,
+            query={'video_id': video_id})['data']
+        info = api_data['info']
+
+        formats = []
+        for idx, stream in enumerate(api_data['stream']):
+            format_name = stream.get('name')
+            format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
+            format_info = self._download_json(
+                stream['url'], video_id,
+                note='Download video info for format %s' % format_id or '#%d' % idx)
+            formats.append({
+                'format_id': format_id,
+                'url': format_info['info'],
+                'ext': 'mp4',  # These are m3u8 playlists
+                'preference': preference,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['title'].strip(),
+            'formats': formats,
+            'description': info.get('desc'),
+            'duration': int_or_none(info.get('duration')),
+            'thumbnail': info.get('thumb'),
+        }